From da7a7d296f2f67ce2ccf77d2959a0b717d421689 Mon Sep 17 00:00:00 2001 From: zhengzhonghui Date: Tue, 17 Dec 2024 16:07:18 +0800 Subject: [PATCH] [AutoParallel] change loss_base after dropout support spmd (#9647) * [AutoParallel] change loss_base after dropout support spmd * [AutoParallel] change loss_base after dropout support spmd --- scripts/distribute/ci_case_auto.sh | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/scripts/distribute/ci_case_auto.sh b/scripts/distribute/ci_case_auto.sh index 19c1a40da3d0..65f7c439299f 100755 --- a/scripts/distribute/ci_case_auto.sh +++ b/scripts/distribute/ci_case_auto.sh @@ -1778,11 +1778,11 @@ function llm_gpt_dygraph_auto_bs8_fp32_DP2() { ips=-1 mem=-1 echo "result: loss=$loss ips=$ips mem=$mem loss_md5=$loss_md5" - loss_base=10.59368134 + loss_base=10.59486389 # output of dropout is different after supporting spmd ips_base=-1 mem_base=-1 if [ $IS_A100 -ne 0 ];then - loss_base=10.60190201 + loss_base=10.60063553 # after add dropout spmd fi check_result $FUNCNAME ${loss_base} ${loss} ${ips_base} ${ips} ${mem_base} ${mem} echo "=========== $FUNCNAME run end ===========" @@ -1850,11 +1850,11 @@ function llm_gpt_dygraph_auto_bs8_fp32_DP2-MP2() { ips=-1 mem=-1 echo "result: loss=$loss ips=$ips mem=$mem loss_md5=$loss_md5" - loss_base=10.5913763 + loss_base=10.58862114 # output of dropout is different after supporting spmd ips_base=-1 mem_base=-1 if [ $IS_A100 -ne 0 ];then - loss_base=10.5915575 + loss_base=10.59354877 # after add dropout spmd fi check_result $FUNCNAME ${loss_base} ${loss} ${ips_base} ${ips} ${mem_base} ${mem} echo "=========== $FUNCNAME run end ===========" @@ -1923,11 +1923,11 @@ function llm_gpt_dygraph_auto_bs8_fp32_DP2-MP2-PP2() { mem=-1 echo "result: loss=$loss ips=$ips mem=$mem loss_md5=$loss_md5" # loss_base=10.59993172 # note: need to debug - loss_base=10.58103752 + loss_base=10.58122158 # output of dropout is different after supporting spmd ips_base=-1 mem_base=-1 if [ $IS_A100 -ne 0 ];then - loss_base=10.58719826 + loss_base=10.58605194 # after add dropout spmd fi check_result $FUNCNAME ${loss_base} ${loss} ${ips_base} ${ips} ${mem_base} ${mem} echo "=========== $FUNCNAME run end ===========" @@ -1996,12 +1996,11 @@ function llm_gpt_dygraph_auto_bs8_fp16_DP2-MP2-PP2() { mem=-1 echo "result: loss=$loss ips=$ips mem=$mem loss_md5=$loss_md5" # loss_base=10.58456802 # note: need to debug - loss_base=10.58146572 + loss_base=10.58163357 ips_base=-1 mem_base=-1 if [ $IS_A100 -ne 0 ];then - # loss_base=10.58141422 # note: need to debug - loss_base=10.58743668 + loss_base=10.58635044 # after add dropout spmd fi check_result $FUNCNAME ${loss_base} ${loss} ${ips_base} ${ips} ${mem_base} ${mem} echo "=========== $FUNCNAME run end ==========="