From ba52bc00a9c3c0783cf2bafe9b890e933180acc2 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 16 Dec 2024 11:24:31 +0800 Subject: [PATCH 01/17] fix activate parameter in fp16 --- .../core/providers/xnnpack/detail/utils.cc | 15 +++++-- .../xnnpack/xnnpack_execution_provider.cc | 2 +- .../providers/xnnpack/xnnpack_basic_test.cc | 44 +++++++++++++++++++ 3 files changed, 57 insertions(+), 4 deletions(-) diff --git a/onnxruntime/core/providers/xnnpack/detail/utils.cc b/onnxruntime/core/providers/xnnpack/detail/utils.cc index 2adf8339b4b66..3afd5d0970049 100644 --- a/onnxruntime/core/providers/xnnpack/detail/utils.cc +++ b/onnxruntime/core/providers/xnnpack/detail/utils.cc @@ -9,6 +9,7 @@ #include "core/common/common.h" #include "core/common/safeint.h" +#include "core/framework/float16.h" #include "core/framework/node_unit.h" #include "core/framework/tensorprotoutils.h" #include "core/graph/graph.h" @@ -267,9 +268,17 @@ std::unique_ptr FuseActivation(const NodeUnit& node_un ORT_ENFORCE(utils::HasExternalData(value) == false, "External data is not supported for the scalar min/max Clip values"); - value_to_set = utils::HasRawData(value) - ? *reinterpret_cast(value.raw_data().data()) - : value.float_data()[0]; + int32_t arg_type; + if (GetType(arg, arg_type) && arg_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16) { + // arg is of type FP16 + value_to_set = utils::HasRawData(value) + ? (*reinterpret_cast(value.raw_data().data())).ToFloat() + : value.float_data()[0]; + } else { + value_to_set = utils::HasRawData(value) + ? *reinterpret_cast(value.raw_data().data()) + : value.float_data()[0]; + } } } }; diff --git a/onnxruntime/core/providers/xnnpack/xnnpack_execution_provider.cc b/onnxruntime/core/providers/xnnpack/xnnpack_execution_provider.cc index ee4e7be0f1f49..23f4c62472615 100644 --- a/onnxruntime/core/providers/xnnpack/xnnpack_execution_provider.cc +++ b/onnxruntime/core/providers/xnnpack/xnnpack_execution_provider.cc @@ -157,7 +157,7 @@ using namespace xnnpack; XnnpackExecutionProvider::XnnpackExecutionProvider(const XnnpackExecutionProviderInfo& info) : IExecutionProvider{kXnnpackExecutionProvider} { int xnn_thread_pool_size = info.xnn_thread_pool_size; - int ort_thread_pool_size = info.session_options ? info.session_options->intra_op_param.thread_pool_size : 1; + int ort_thread_pool_size = info.session_options ? info.session_options->intra_op_param.thread_pool_size : 0; bool allow_intra_op_spinning = (info.session_options == nullptr) || (info.session_options && info.session_options->config_options.GetConfigOrDefault( diff --git a/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc b/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc index 65db81e7f4013..1fd0bbfb36835 100644 --- a/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc +++ b/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc @@ -6,6 +6,7 @@ #include "core/common/logging/logging.h" #include "core/common/span_utils.h" +#include "core/framework/float16.h" #include "core/framework/utils.h" #include "core/graph/graph.h" #include "core/providers/xnnpack/xnnpack_execution_provider.h" @@ -89,6 +90,49 @@ TEST(XnnpackEP, TestNhwcConvReluClipFusion) { RunAndVerifyOutputsWithEP(ort_model_path, "TestNhwcConvReluClipFusion", std::move(ep), feeds, params); } +TEST(XnnpackEP, TestNhwcConvReluClipFusion_FP16) { + const ORTCHAR_T* ort_model_path = ORT_MODEL_FOLDER "nhwc_conv_clip_relu_fp16.onnx"; + + RandomValueGenerator generator; + TensorShape input_shape_x{1, 16, 16, 192}; + std::vector input_x = generator.Uniform(input_shape_x.GetDims(), -128, 128); + + OrtValue ml_value_x; + CreateMLValue(input_shape_x.GetDims(), input_x.data(), OrtMemoryInfo(), &ml_value_x); + + NameMLValMap feeds; + feeds.insert(std::make_pair("model_input", ml_value_x)); + + std::function verify = [](const Graph& graph) -> void { + ASSERT_EQ(graph.NumberOfNodes(), 3) << "Transpose nodes should have been removed, and " + "Conv+Relu and Conv+Clip should have been fused, leaving 3 nodes."; + auto node_iter = graph.Nodes().begin(); + auto check_node = [](const Node& node, const std::string& fusion_type) { + const auto& attr = node.GetAttributes(); + auto activation = attr.find("activation"); + ASSERT_NE(activation, attr.cend()) << "Fused node should have activation attribute"; + ASSERT_EQ(activation->second.s(), fusion_type); + }; + + // check 2nd and 3rd nodes. + // the first node is the Conv that does not get fused (created after first call to GetCapability) + // the 2nd and 3rd nodes are the fused nodes (created after second call to GetCapability) + ++node_iter; + check_node(*node_iter, "Clip"); + ++node_iter; + check_node(*node_iter, "Relu"); + }; + + EPVerificationParams params; + params.ep_node_assignment = ExpectedEPNodeAssignment::All; + params.fp32_abs_err = 0.0002f; + params.graph_verifier = &verify; + + auto ep = DefaultXnnpackExecutionProvider(); + // So far, CPU EP doensn't support Fp16 Conv fusion, so verify_outputs is skipped. + RunAndVerifyOutputsWithEP(ort_model_path, "TestNhwcConvReluClipFusion_FP16", std::move(ep), feeds, params, {}, false); +} + // test we can share the cpu ep allocator with the xnnpack EP TEST(XnnpackEP, TestAllocatorSharing) { auto init_session = [](std::vector>& eps, From 6032820b52e90058be2c3da76a479879313b0646 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 16 Dec 2024 11:38:38 +0800 Subject: [PATCH 02/17] add test data --- .../test/testdata/nhwc_conv_clip_relu_fp16.onnx | Bin 0 -> 49001 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 onnxruntime/test/testdata/nhwc_conv_clip_relu_fp16.onnx diff --git a/onnxruntime/test/testdata/nhwc_conv_clip_relu_fp16.onnx b/onnxruntime/test/testdata/nhwc_conv_clip_relu_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..1621d8b17afd5c1fc4e947a1877f80242f6ab806 GIT binary patch literal 49001 zcmb5VcYM{xwl)l=8B3$4!|qVccX(DXLV`htDBl--cBhKVihkaf8RCO`JNVM&18$kwlP# zCyq=XKXuB;5%nweOdpyydE$i0BVANX#s9vrg@={>drb3E&C4_|-8>|_bcNu=rNUm9 z4*TDx3adM8+=Ss{2Q!05jGQ=S^t3UPM-J{iIBn##|MZ~c-w$fk96li}W5T$pQ^rh4 z8$4m^l%S+k{qTRACZ=qw;5b{2u!gWNeeD}^5~%xM+Rjt^Y^IgVHGA%Ngp#}A%p*(Gq~X3&wsr7bLp`EuTzHA{@0Wf(g zs{h|^|F6m?>sR}~rue7o{}*-rryZ&MuO0c18}iR>``fzyrRSio|F1pzuR;G<{+`nR zN%dq{wZAQg8sYq}-A*FHs{HfDurWg?ClNJj{JUkhxo0+{(~Sj8crTLY3kpGFm#;rpJM+H!zkB1Xene&*}s4NLkDBR{$Ky0 zM(v4HCyyRHMTlMN;E&tob zQ$tFXvK2K5-CsJibZD8-GQns6o5QEhYp{>52Fnt&;9~Y3IfLv*-^Rwl?aWr98{3>) zM9rZN(eLqesiEA3pCVh862Z+k7iW9NXvYzfcIadMUzIfFdu@?u*QXmtj>@reXXQLUoo)nP<1x_1aAY;23R%jr4-gC_%du0qZuBlH5)|Oc z+ECO({EcQ{4JrjnjFrZ;_WDIRK`-O~sPqZGYq;F-b|yET`pAK#ouDR!jFVtf5?l|-GVa}hQ2&qGALcXloxEQO7b3Mf z=o_h;vO}>89<)G@Q8wxgjMmz3as};AzL^@MZQ(1SlPF!BB#n~Gd+TW5qqpJ-bYEX0 zZ$4iAQDZ~`#px&HwsN|XA^a@OREqgFY8q-M#fY5z)wfJZ*L1N&WAv_a6X~K% z@O97>eS|VVpXxuOEtYp{Md#mMvnm+@CK8D#8=aL9huU5jJu=`4ZbkZl~+wGHrv)ogw%s=UQ7&^{na< zDxg8Ak6c?WREG1*=(k`C@c_PK*U(d_Az%*i)@-S@O%(lBwmN%Dj$pe`Tj)r*g9w8- zbBAz}t*9}M-C})Zvu~X8MLQ^1_yyv#qVn(p%p~SB8(BYfgY?qfouv&#N72*$Js1IA zmY$Fg@Cl$0t|nT;&CD>e2DyMdg7sEQwCbXU9_p2pd(vY$-FKDMxL4F9SEr=cU?n`s zEGHZ4z0IL$ESNw=SF1CtJ{}Gab~27w~0p64QZ*Bs2oqVyTUEF}|KY z1{xAu@Nh!2V|t0Am)rAO}_^n#yyPxxyVKTtlKJCdaLljpWt(dt;OGqw+zmE)GkMf$7%E+(~n?`6)3y5QA{jK{f{i&DU@@Q(lXp zpOGV6X-Sc=v-*=hNe(k)KZC}j#VB3@`V+K8d#%*eL}4C%1}r5S!N$y4vKe`r(i|>z zs;e!tuJ{TFV}CS1Yqt_Ff;micvOK$wTStwdx`JWYF}e$EL)hQ}<{TMI4xxJDquB1; zJu026=-LTPP=;Zs0R~|vYpWbt)NTA{P>btFO>li1l&4x!qvU0bpPj~)gEa%q*hXCF zo~=v;SNVW7Vk+zh@|k?Ht=3){z;Dy;C^vlN;0t()xB;`6r{p7Y1l7+uqPXRrU;P~0 zAN*+zb78Js-~{~$$j&k_p1DPoC%zIf*l5^}sYd1v zu-j8!OV`>;c61kAlJCn=>L_6|br$R)rbEP@rK71iAe;13gXr##Mqm~wgI{qrBMfV+ zU=;o6ya%eXr^#i+a%#AvEn4Y1!&E6=N1le~jB`qK%xkqdYAiq1enqKDw%lEH3ghI< z%5q`3x&zIU=Sv-wsbUOd8HJd^{L1#FJY;|>!luw2KpVUnJP4N%!^j9K&v68FQueB! zU{9?J`i-b9-W2<~>VmzVSa!DDUn21WWkc=|mgkZ|1s9Wa8-B}`a)raclE#7>2};m{ zDF0QTX7rK}+|AaY?lSAR@$`FgJ~)kMB1~^7XXvcaQ{k0wRaw~2wKAIO>s{mgON}yY zh`s^^U@JD0;@S7yQLvA02Okq*^kMKBUjjbB0%9(aNX>SbFoDe`>ae}IAIP5cEO6Bk zhMu8ld7)lp98vBm+qAi20(}Dj=TI<_Sxq)@TyVa%{eTp-OTL2A(H3=%T0^TRe-FNg zV~B6yQsy)HnD~WShDXB3%zp9^)0vB)D^OqQ0(=$T!@pAhi~GPI&m78+1*-FTIrSDJS{qQl8v|FRw6aD{+~iiR*o>=pwMziNk1SJkcu{ zBS+W?&>40j^59EmFLi~yNhc9Np0E5aj8USsNa-`5BfjzEP%3CnP%x5hPGu0AsA#+j z(~!*|7qj!ZFVs;A18to7AQ05;nczHYgLyfTI_wzcufq+ugcBpUPKFzFim8Q)nQZno zHH~#~20enV3^2SSI*P`N`}8e_T?thc?RVaf(v7ZQg}*)fk^NIuxTX9Wh_Szu3)%86 z47{VP@D=fe9mfr!V!5BV$rMdJ3dVT?eM0l(LL<}^)o^V(Iwt1Bs_bF1XHde@pb_00 zv>?hVe<_-fr5@I9N~Tytj`!_WE~vi;^WbV-P=?8Sm4FxmCc*(k9K&)VRZM(ONshZmlz#AZ6+kjSdPWQ;OmHLu;i}kek9j z!Uc6H(*>NuzLj%>XPD{AcI~xzR!EVOct$;=$>J#ON92>o$WN4@SJLtDi}PsEZzVBHpU3KIhY701T*zta1YUz7)fSflaWK;EQc6RjbD^?N{+Ia z|4HkF@`Ok|+Bhd)kZY>p!UYuOU$2(&d(CnPs(rOv!Vlmf^MEX14s%E8iqvh|;oM}D z7N%pX_1^GzCR3cq^k#;uEl?df4dv^LmF{w5wV$xb^+TYe*(KRDTLz}PKA{w&E;p5P z(8sybE*JPc=%+fa0*pCHoMF~-5cHdnFAKu<6%qYDba}iksV4!&>_I)=*=DER#U5``T82|g*x4+tL;TQjqlZ!Mk{j} z>ZTn%;h+Irk6puFI!fyKjE(wKd4RFlNJls1 z7pR98DxX)r2vO=_q{{=O5*dm=2IJ>=WjNY_3gzIbOF7I}f9ZG~h zsDTq+pY{x~Vpjo413fp)TI}iM0J5VH?zXR#{*`bj15`pBq(-29(g`6#D#g>F1{~?^ z#XMp^kk81`bX!L!dMeoEtOa9(k~b$Y>bc_ydx3jLUFXia;^AWu!i*#n0Sy-rn;^wH zsCA^9p5p}QGYUxU^-$xu(nAr{0zQ(d!9E}=G834^_(4Zm>}CD}=wW)3PngqeZF&Y> z7Yx8#agVqP;6As_rGd?0Gjxz?upfJcoUJfgZ83m)_#3HP!4I$*F^XBien%!z-_win zTBL`f9G$^sxQt+(4N1nXI$i~w=ttKaHOFYrzM$G{UBOvh4`{(vI%y(IW3DqNsYcKb zE#yHM%^sjmlN{ZactGx>2IKGOhhQDi1i$T!a2!=zYQ5zQR9$bY-jK_wOT-Sr^U}^> z1lX&+m%56rD#FBBe@3XpAJ0X^li{);jiO`d5%tdemhz#3a=MG{e^%wId zx~8(jGe!SGA?#-?TRhn$30&6(f+q=iiR}m^9Ma>N)7&L-7W}A$xLR12tJCE_;mjufUjTn6f;E%>zOhVpsulIk=!Ja{Gh6#Z9Y$np zIdCIrYMcj0;I^XLfg-(pRH`6Tqf=PlN;IBcNjx*Qx|;?*W21~=TuXU5cT!yLTJ0(h z3`g}`yNnLt26cgHYDM%iqzV_!andKTuTmo}OG-E5lv(z9Ta6%hvuCes< z0dT?n$o~wckhS4&XdoD5tINjNr$dw0@l(bhMhUZnND$W(;mUpfi1D*H0y~675_$3? z>}MrE{{Zdb2fEt8^}3g+ob07*;vv3NVVNY7w7Qmj?o5nYThPJ%lp3V9rz%HP1>?9@ zM1+>A^|!9#V)?zcJNAX#uLbvgh43PO%x{%f27W{_l$-iUt#j&M7~+WM zT;y8MrmUaYK}dF<^;@KG97)7f&>roh%Ex_=b<7w18O{-o>B3(3O!kJSGj>8;{3mx;;sQ#spK(-)i{*x+tNI83COCm=P3NkcW9HM{@qWoGK(Y|b#_&1S zgDI2P3~9^~L>cqNXIP|bG24#{m~*Jx`dp?uchvD7oDxD=%y^D|4+K!6KHqf;owwJa zqF|(bv1?@Veg9H_^*{_ChLvRc=G40K3st-Ji+Hr;lk~u)d)uu_9OjX{IR66 zKhD3B8w-C&2cbKKRnovCaMj3C8wZYIZ_Fgx7DPFE>7=tKuLT^2bn7dPBN6a1w3B z7CNnR8FGnsk51(?Q_iC5C8dg2$NSyOQbr`5h~-M``WAjqppLDT{@go4s;YlfXUR9& zhp4$Z-x){zLU&5o zacBWfO%|L3HTGA%A*_#MI&Inn z#amE{SjMQ%`KrpMf*kHM{io^jJzxd}E&hA0lbI$};`Wl8_2Hns{6SsMFDrSWbxh0! zP1i*rPWXn8XOF2132TL8wTk=(1yzF^!Mo{rF6ytbERynRnR!3IFOfeSRv0iu3o?6+obM z-d^T@{H*;w6Sz_-)v&f?d2qU@DY`^m+W3u~Tozr|7aAkzVo|%#-FCkI^gde#b@dleyUSV}7o1P$|)O*%7&pxlTQ1Wb0#Pj6NRh z!H-jS$#c{x@`gJFkOUW~@2_0c6+{+or#I@c`aPx+9wNiSMqAwz z80Vg)o&%+^#;)o5V{kEfHtFDUTo1UN;2;|9U+w&fsvsQmA0v7z`^f7kL7!wsJAIy` zbTcj!IW2LT$@ar{gM3p<&cU77uiR*7c;XZNwrhpHf$tyR+1jnR8;Ucx6Ag&al0tXS zY&ozOhP$GRp9xjs58=IOf_uS@@m|hp1`O8Y>4&E(S#Xa14l}}-Wb7)A4%{*a(iY>K zxdm0#b=EI5u|F(sq?Zlk8=1uPXxt1TcPa~|zDARx2h<<@0OK7&#FmBWL>Fbha~x}z zt{L~~MG0#>6$3Sr)AHXaJA49F8+*(bYu8Y3tem<^?nf}z5aYIAHrHz>e6-%pN^q@g zKjER~BUrd2M7f@~)pkaI&OCSCfP%ncEs}KCSYVS_^RYeFm?4z6Z4|aR)}rZjhTcFX zrAV_XIA>1x=7OiRfcu#vdpvH*UGvYD`Xconc<#g40U`WIqdb; zZ26<@bKoa(bTmg5TW1oP(l3^uEWO>Gg?RRbwX}ADzshfvE$-{y-QrHqx~(tVopqBx zO~1r@*jMMgQ`2nA#W~_^o-^yhH;#1t1$Ia+?@FO3I$HY_;+lag?N?Q_LZ?ctHgI04md%brr-`!R$&5&_H<%_jK%`+2uOu9>qz10>5#$=j6Xk$Yz&5hg{NM4FrMg59ti`t5)*LR>vq-5b zH+6Ums#9Sx@dbSoHWI^xHN;qZl%NJPj$nV6Sm;_6-8SJf-$jnFeHN|tRpM9I6;FmB zNxBeP^hJ)xqvS9pOj+WnDdp&o$yL4q1Qe$xlz|iDwn$NCx-N)^h`N?SxQe~waj*ky zkp=^{&B|tCwvP^|(#kP!`H*+(`V#L=UoQwlOc6(-enyC@PHs25sZ$!ZyYR)(O(G~zfhZ- zKNLMD*CEWcUzh~G2IFzEe_ZYrIo`RkVFPn;ptl8KoxK&<)WB+Wpm{967Qe+H$xg%{ z(kPFlS2i_kb)uO(!EubwNIt^t1?z!LuV~YYGiqSI$BwJU4my)4mwg6S)f*8H^cz@q z@_c!fenDbA!{p4Ao$BP|IHMNZ#J@xBNL?{@fU#5xmPmY*X7AWd-c=WSEao@)YrJy> zDNr6h5Jv^hSl{i=#A5wF)5l?wu{DsTk9IFLRu&&(cezTt*2t%!8Td{e9XmQO0bWK| z)r#im#Jy-XxiW}+TU-m_NBXw$o!&CovC)+ry?tOm+zhrW4P#=s;h-1#%jO_X%eXcS z9i%!6?e#-mEhdyNP>;(WH6Js|xl*}g9c!~hjEDY6U4<~=i_P%F%BoT5_w z8T=ukA9ET^Ll28v#h!_Uh|U^pj&sEZ(Vu-uQxn|&A@+CV9H~E51>6%q3b*u6l-=;< zH1}83XEt1+OpAy7Qj{lO@jkI#AmY>ts@IGRlsIkreP))whrhqyPKDWs6>4N* zZtHJSGI>PUKvjZ`lj6xw=23k=Jx;EG!bmr93rIOV_~k+aEsFlE{K;Jvq4*b>sAR%q zsUmz%4+x_0rAa?lA}#av<}RRkts3C4G~n_TXUB-0`NfXs{08WCFVnX)Z4}JB4LoZh z-6+!Ejah<)7 zSv-@^aHhMx*bJpePSXNfm4;UtlblY3S-ZQ{!YZESWEb3ReXsPTx)c3&jW4Q4^{0lT zbo`Or&(=u3OSYjhiFqi9R_%(!cuxf4-R*h7(SfeAa|Y^-Nc?BHY@jxh=rVgh(}&2G z*cxuLoD-36T;qojX3l7Jo7^hdf;tj)g`%horkQ85{uckJHzg~X1u_qBdY1W3>Nz0Q zDeABOfv`S3i+swwckXrc;*TS!Z1eA6zC}x-ew1TDb#k}yyAuYxa-@)xVD~%u>2{mm zA}$n-#*_RN>$nJ@&Q8}|q5Dls&HsB9QTbM6vGNm)hV8f!iq$;SoctMLm_ry?g ztYx#SoEnKOWg_Jp1=&V!bmg7>J)>nwf9c<6Go5$e5P{49RUxbtFZj8M|@9f?`n+kL9%$(QC;8zVzs=K)$|Oi ztpT-3sB~bG=#|UJpMk(nQ8Uy~?K<2jY~$YKzwr1IIwRK>Tkr%Pp<~V_mNI%XZLJZF zdL>Mtzt9y5+Qu)?E|Yb^TcVPDMIY|~XtTUnz`YnY>hO!)0$3(r+0yHTASKVOFHM)=4vIn$^`(jg(+*PT$fop1m)SQ>6j zC7PLsotp|+UxZat-X_EbPJ;zTYp%L?GFCllr2Dy}IxbIM_*Kpw32LG*J#!3d}?9&9-vt z3tu%)9PF%A2HRzkz^J?49f{kqYGAjVPQ4fB1YSsuwH$Po-G|l^?Z~#CRBXTFheBk` zuB9qFWyDX5Wv7UBY8jlT2SgZdgk8v(e$YI7Sw6B_SkUAcyfVEFp z#`jSV*%oiR=sAiS$~ijcIcT4U=ki(nSvDQ~Dc5z_-7`xR;v9LC1fU(L2w5xlH~M%^qrwr9MQ&H!Ucwb=fp&u63p3$3 z-z&2GuqeVtflTJP zT3b2F9MDDI+=6H5`=a8wyUx`@F;DS(ld{>6kezk5E3)H%(th0pQtj9X<`F0Vo z`bK;$H3RhFKj3H7hkBk|qR;R>(>s95j%RE`6K$)b&rG?DSgdHQ5`UwoTTyYdhE^G)2lL&(cS82#7#vU>tX!I^bBE zRKiEY_+W-XYdWvhDdjjw zv<1!alk7pzmG8vz*uXtbl!!{uTW%;y!yx&MTFGZ4%NT15OOx%K?TjPjLTPY*_u@`o zO#5wTUSdUj7!#g&m4b3DScr0LGaV8Y5;I*XC+8Gyvz2am$I2IM=Z+UP;(f(IS_soN znCG6e6N_hg{HeiGT@f2oOo{=C-k;AMHQ&mzKZf=WiZlSuXMpiwSlnt9DlNGvD1axt3&j$#MaRe&sIol+z2pZGW2CR( zQ{wTW6XF0S6|Ax!B^%4T3lG ztv@yZ`-FV~iPR+90Q{4+Q^8*2C`@*}bIsI}sn4!C^n1q}Eha@ba>1Vpt=);=AnVK# zY}tFXqHU*` z4d!+^Mc)n@?uk@a+e652UFCdT+&CQ$SnCfUlDrJ^tY4gekgWT2;s_S)9TdtiROr7+gH zqR5H(yg7l#&a|SKgx%m^%w*$(cZkIz&xjjBe?hm50;R5eN2_kH0$VHVh}A|n9K~6wSLatrL?u4#+|0Duk{?W&rvP$CcD=^gejvrnS$a-a;L}| z_w^$sMe*$l^NrC=z@WL|`G<+Qwmm4H>rm3da#VTjS){Fle4S(5Q(JBKK93aw{jtMo zu7E8RyF8lr;H9<3$}o7JK%lZ&hwa1}EaG$Uq_rTIQY z9itSz*MM%D5(%Ga%j!6!+@|*u8mZ+t52|GLmIBnIB-s%fXrWSSMPG@333Xb@rYd{A z@(B8#eBE_KzZUquu%WX}EHBl!$az}4AoYWSZyej$JS&Xks^%|ICOTS4^q!{v3~Q7aMs;8b z?m=l-tZYa+AE<>&v3}x;x($V}Eveb^xTxFUJTqF0Bh!WRwx6+n%v5h(P=ndWOp%Mh z27)36Vwrj~WsdX|2vmQDu@@F$cxC4bdK7t1r)?E;CULFdK3V@J-1!=18C9$_ z4N;TCBPdn1FazXzs0(wP!ORc%UB_^6L~F(j(;G=kHOKn5VlQiZeS|QKn{Irjsu1l` zH08Se7B^Nu%;&*G_7mA&(TI3{u(}y7QPW)rIP~F49U}`>1qrfBxM)TyjW3iEl&jcU z_ho-*$tZ3GGZ>`%ZU`@cA$=+uO1EN5oAt0++AT3nP0MS8wK0Za&5O!O7ag*GTFk%$ z{;KvgB8N)liwTFhMI~?=Ow}qoY>D%faZbT=NIy^CW50{Kg0=UzGw+}>@e|Ffnxyr& zoFOhMFsT$==4j~{Tx>D}0Hof^OSDb)`RIketz$NbF@7e;U>8i5??NOPslHRj1~Ff1 zP~6&kPnwOsEp7%~2og86Y0@+C2E7)Rs0%#>yyAJ!FNWuGTXKu}O%7TvPg}H8L2kh* zbp%%*bS&a+Q7tY7Ltt_m`K zzXmxR7N!$sb#Z)zLs3TSt>L0TxxCK)8RBPuY4>pFZfa5fa#)M7letQ|(L%v|jkqeP zfBd`LD?ye+XVMYVmuQE!Ikv$|?lFl)Y6GoizQQjJ)^4@1UEEZZfSu-_D__u*>>s#K zQ~=*10#p(=$oZa*MkOgLq_wD$cZ~T8>*$#zeQj8k-AfvQN|o zZW7fUVU1V#lcY;@ZDO=muW^DhMA_lLLM_5xlNS>|f`!^wY_0;aA6=LHEj?Y0vg%VJ z6IKl($W7bF&zVH`jl*_m4t2O{ErT6HBzRt-fF2ahr zO>)P;UfXG|l<_M59Pv9E%q&mHS6A_2+G6mTIY7M?vI;_6&G2XZRd%wKutkXvP;&Hf zUqBz>4Y%*X3q6M&&(&cRqx*?@+5%REU31%HCm6bN4sRk1N*pAe2D4}d&8H*rW|5my zHp)jGLp`I3l={c)!(k=n4PA(TLx$5eDMh)7ZgV)fv!t|MqL%kJ)|&gDakJq;u1(?- zMb|^%DoYpkxwesR9J!eZmmh=6ft!X;p3Ux(?TLRF&257mrOAoHxWF`JzW(0kH$wIG z4kB)=J~fDAZStlVO6+6iy?7T!5X%e7Tbug7fW_2JuRvTyPII0zOKE^zgO5?S=x1sf z(2I+OW0jV|G`xq|gLD`X^wt7}+6T&lyHWkXKL0gT;`>By2$F=8{8RJK5Oo8Mif8(J z=0v;t1P%l?f{NPr_$~y@rh#gPNyTBe)$q97#58?Naa;Xo{O9E6s|inl?e5LWi+yi4F-_QHFmh`~&L)NAtG> zy^z2a_`88#$z6!M)X&_Kl0@Zm5QW?b;?zqy=g30weqyGxwzB{Y3N#3w?n=A|U`6Rf zklQ<$`=j^}ecK=6yO5A;6V(>phVl&VN6$vrm31?0KNXDyKML7I<^0qjbFioXvpUxD z#&^PD=OFb)ea(b3A5qVGJFH)@w&0F1%2I5shZD>(Rt{!6D%giGIp`ru@b^(8i09lB zBY~w1mR(W&Q$h&rP*RGW!gQwIcpKu=$quaS$)K35Og+gl2X2D#^<({$rPJCqTDP2l z$?zVtl&z%K({{<>#(VWfoX7o(;{{(M;h}LKPUVtfCLvx(kQ=29a&2RRRX>`p_X?!SPwapB*J@M!kF_1#B)&9ib0(;r1YR6tPMvDw8T@E6;3$wZK93Cn8E-mJ_a} z7**&S?9MHcxOX6qCuN@JnB3nIv zpH|cN0QbvvE6F zTS4aK;m9REcWyEVQOn3te3Y$u;{`@M)j^zwROV0WAW<2t7G4FJ6P?@=3L)2k0_6-o zJFz*;kvGd1&7wq8tc`9K)nc#kb;Rz>Y_T+%PL9M^`&+BLRD+=G&GLUyBJuAN1CFYs zHOObIkN)Hr(nB4xe2BXbzO^3STE;O{-fc~cXL4R+O_ZnZPvXk>j=YaZb#_3n)bjS> zA;e0=CJMv@^<3auRt!4-tFgm|lheo(H~J=3ZQ*_VOnv5?>2%IZL!k z@HpSA;0|^Uen#ijre4luy_EFE*@WD$O%Kd_aHUr90P`$|q?Xz*=!3fx=!j={Q*(dD=Mkhl z7+c~v>k1?Lz$$2oi1ur#OvikbVUJUGUWg`2cS@^$=$wdI~IL7wUoa7WK%vW@Zh z710XsNDE*OnC+?qZ(*01_G+TQyXMC|3vwo2v0KU5AjTN37bmUc8rH1H&-SnNet^eH zUO8vPx&*>=DsUiiHGpp@hKxDjy<3ds37k2*K#yK`~0(vEbuMyoVY|M>sVmxwwtb>$>&-} z-x1V^iDrAD=BSU_fFwaHzDR4v*-0$02Fmr0sh5&k_4o2X@;;Qus}?eSHev3ptq}irfR|?%AVM=>L=o~R434d_zAQs(1lGU z8}w_|9x-`Aq_hnlPwXPP1i3|(&;;dNfK}hcb3XxhkLyV)! zN_-AIkJ)eEp|6CGjpFE~dw6AuqMj>*)sr~TDC{}DDSn{x z(7n;OO&Z~hmaE1u@o*qm$HX2gtj1%YL+(xM2k)Yo9@zE7fH+G@wE3{Qap&Xu;fLg^ zQ9bDv31s%REs26DE6PRdXls_x&m)QJoc;0{;*Ww{`%EE-mx88vJNpH(KOU!Y8)hc# z^(M>HpiN+`U*oRJrTMhzJNCInpP~+{efeOYPIgPR#18UM$Mpo-k>ERG>FKB!AtD|3)@aV?aYFxL+yC;b^h}}Ll{u{D43e9igF6&lFC$@aoZ?RXc^|C&x zByrynu!K=BrQ7@*cN@Li?lay1c2oW0xP@&kAo1tee81$GW;vF4T1udbg$te=So*F5 zjj!g8jLoJ1@m~Jb)>ud*40${gLspWBg7;X0a)Ve1dqmHp>S|A=+Su*9WPEkPSj#H< zHMd`x&0mC%d2N{L+HBiJ z@XEOuRq-YG6nUz;iR@6nal{2_DOQV%Y!KL%~O0GVH=f8mP2b`WZ)%K^M{+~<$JiB$VQSe+pO+5t&H2Pc*=ix zZZOFbo=LS6Na=Lv|Y&B@Ierm9^C1$7WwizuO5jpVkgGa@sRZ{~M)S4V z`RcLY_oB7&n~9l(s5Iv8kegjcz@_|09!D^rfA2g{e3EaEH*uVoM+HWZEv)Oj8;b80 z&X#vLzXQv;W7<{UJMW0t59D|2cG+?3aQ}k9%nJi`)Y{ug%^0htgiJ zZB6n02998xcl`u5C}S*#^h=(gKmOcLXX zk@^E|5Gd!|#gEn>=RIN{NC|Qv|20TrA z8>qRbi8hxKyV*Y-#+4G8=SgZ=GZToa7Jm>oq+xRK5rrD+ha+pMiu z22do^03Io-!ex_HQhrimU$an(wk;hcl(qPDZ&1r&2>A^W>&{FQ=8r z;Kl~OgBs4A;chXVz#^sAo^17k_o-tK^Huqzo#zMPf10Dn^~4@FirNO&pv}MwPuXvI z?fO<_VjWFtXqg=Uy91{h@>BWkB*E;~J0vY|Cc4(3-^_|+ZR3f)g5QP<`fu6@uT^_N zZV35J%|l;kA*GxMY;!+ZyBe$@co$AujYHqDD&~nXi8qw-J#j0%uE|v$)Z0qeI zMuMhzXJSqEaL&_ein&%B$FTJz~0Xfqm!uoHVLvvUyPdO_)xu$49UZbD=}X?&9x6zoo}pmj~8H1+l_l zP1jP)wqST!^0K!|L26Q*+DNMuciywWK2Pvq4J?1cjc8FqsrYc%qxhlTHzAZNpWL`8 zIsOWNUg?15(1Y|Fw%hJou|v#A&+>R8V3Et9@q{O?NC)z_Tz^CdCgv7{3fOCVBVDz8 z@82p!8HLgPKpQnP|GBY9N<$0P5iHH51*XtN-s9S8ViDC44i(3!YrJeMPId=T#TB@n zjRzM0>2Nq_2P>J3X9!_CMqw3VR&oQ)gT^qJ86Iz?>&tED%tt!t!k5m zMsjtfCCwYiABfLrWvm!}Y+DS*gcfPbjL*4tq2<~pEU50&=VURi-+|BMGGrw#pp6uO zAJ({5 zB^$+&A*c0o!0D_S%E!Lpf5JKTH0dwHf7-Z$OOD#s#(tWbiJJ6zsThva#$c=zaCSFL zfi!U}8BrSHH61TPvmG<|NA4HSXX;XQbn;EvfFI&Yy|tB9Xo{RdOcpw_@0CtcCedUn zN{PiT`*Iv{#>Mz5GE4RfwK-I6LoRdvBjzb2|AG9Co{X*osyWMvlROhXYJIG%vBoK_ zz)|+Wi8aw#WT9RYv~?V6wTS7@d{?@w!$2o{2U!9=h!mg|$)idQ*#UJ_yN8a*6k$-_ zlJm3<(i?TX)=R18K8t){zNk|2eeJ#duDw3jlK3p<*nScxhUVBGvExI_w3!f04$?TG zO!+I4D7yJ7Y97ARIkD3=>%*WsvxzWMKj%c=~nRC+8b62KXWWU1K(>UG;_a*K}Dn@Kn zAvVX{i+x(k?H;Ar#Rax4_MO->M;cT`$+Vp!P6WRr$+$bvMal~17DSsebuwX%b53!Y z79Uca8_^Q6nX3;mDBNBdspYFnaf6+bW1Tkpb_3C>3C+h3FmL1=+k2~*{HsGBr6cN4 z=vVYVXIjLIPPYwWA1jZ|_wWh=PCU_{RwHjm+?2CYJm_dAR-pXULgHLjT{Yj|$u@_Z z3;xQlDgDUzQ|zI^)Oo6=(}j;L9;{3Zdm;hDRArVvgM4MIBit_ePeR4%B?rO&!W$>B zyPFS?E(G1m60(E1l{(J#F6gFi3HKB%&OxDZ$_w#P^l@se{YDdq^Hz)3t_0;Wau8ig zrD{$6JE$}EIWUww1rm7~iMPa47tlw}P0Gi@=#m-`9N7ic_BAn&A!u_OwPwmTK#akcmes-9^=!95$Xy28mwo| z$!^ph<5g*P>1551_q5IlUZGNhP0)DPJh3afH{6wN9q!~cq6^VNw1K!voJEkqNpb?& zA3G;smnv&n!GxBlvAtZ0LCCA^Z0*>O{ha*4dk9+zdxMo-?|r?f=E=ceyi(mzzGX+s zQp-4%5KlqrT+km01CYTMR)4nr;aaQSQ`VzTl-gJ|a6$ycwnikaP}!Dhbm6|rAwube9qXxAJ zm#Ee${X&z8mP!ZoWpJ@~q4FbdK`<%jlDU!pS@0vcAUuZLL;fv2RT9ZDp^5TM`&;Fk z)S7*y{!W}K<;72o(fU>K#IK|8hMqegg&!duuw3zqI?FT@Jc=0LImq46)uh(K4ceKy zP>N%-9s7aBu3F$_Wwo~46N|q}+^m)aZpKIPK4aySqhh%>NE{}fp=0PH_!el0c7?lw zR@GK%)s?eig^-W9MlJzXKslj~_7nQ2$N3~B4XdI3iuIK4L4&wBz1Q2*HNku>!~}N= zi(Lcdh9VYvq7Tk)P=|xN1RA{T2A;IBL!2&Po(P0}rTgBH$k- z^+or`PL+q+eW=TMIgmwdV4J$Giyg&WW0QgjN=0D;y+t`n^^&fk9=(SjWACc=18;^8 zXdU^2(C6Yiat*8?Fo@a}dPU4pCvZz$_e3xHiawi=r}aj%r4sn5>$CHnsuV6HQpt-- zU8gleyRP9k=Y10vq!x?z)J7zc3ML#w1iS~V&~Yk}=@olM#cRSg`cyC^($F))XM0~dt- zns>j^K9C0-T2Ll;hrc-IffM9|djC6u|Lbf8A0m*_k3^|lOL`LC=Pe4nrEh@~bt}X; zl@CXP%bho<-oZ%O6vd(K4V~mW1v`dhuwm)9eKEx|xUu0OTr=mgRs)o8>OVzWU9;Tg zaHPOYuK<1OB_~79j7+v=maN0CDI>@n<$^jaI2wB)?4|ky_uD%|Y0jmHg0rC)Bkk4t>D=8fmQgIG)G$@y>hjfumE^zj3AiaT zO_~TNp-aWOY8@2I{eag9?3J3z6N64*g0h~tfXyN-M8D#te|mZ?}h(JWyyjSdZW zl`0x1sN;m+?KDfMd$2!4`-1nt5;EF7oz<&9;D%OPY7XY}V7;j&q6sWbF6NyfqY_UDNoBihRr6q)BhrU8< zg&HcZRHP=tc3(y)fkdzeq}AC%ZV>7ddP7!}r%{{bGgJw8(*deW;f~Hd>{Lf}Vy<&@ z$y#Y=@T=Ghe?=BL`iTwXarl0Iq^BLHu*r5D(73~rCs;1;3l&3q2$wn?Kk5yV)wO5( zbSMfRqTkZu{LPTV>_KBqYdZNuj?sC$Qp=;ziIAQvluv5i#Ru9>(`WQjtNB8E(?jK{ zGYr-A-jQ#|&yM^e&z7HQ=NZCfU@AY{A;H0hkC}TNcTUSb&j>qA$|+Rfpzf1h6Q*YlGJLX zy*f2es}`!X@0e|j>3ryjxCKgg&bAS9ZM+43%h`sTWxuBGX?)hbOPPRLumxl%l~DVN zGfQHKEb1$?Rhgl>mFJFRM-TfH1-06AhNq5TRAaPZLPuMz(jUa&N*W%U*p*u9 zlEG-*`mkG>1hpm^vJrBbp^`p3_u0B2x6sPMFZKJVTNJZ!oGN#i;lxlGCP-71WQ^s` zTE9|#l(0s*IB5v_7y57Mble!&W}9GGkGk~yP8UnJa8z&u);E;ncuxh%S;SN2tIMlQ z&qb_hAwBI>vcpU%8wgK3Nh^5=fYv8^}H4vbuke`$|ArG?z7g;OtnJSXIj9ZU=68k&4 zQZ>b6;gG4a+(yq;=Q{u3e&uV5?S;m0Pv}XO4s&TQY!{ap zoI!Nd{Tb&xJFsQoYjPM9Airw`V3vNWIUK#6Mc^UooNyBFY-^fq))~z@lp$;1$RPWFnjx-XUJG z`^l!hDCrXM48I3V4J}d{2OE%sbjtICZ5VZy{N$VzszoXGN=#*rDx9Hu{hLBP)B(wf z$zE)z{j<0mnC0Mt)iIKKPOjE_F>d8|TTr=M_{zSGN+AgBX6QQ6qAbdOSJ_mSL@uPb z-Gjuv;X5haA{pQlJ$YTl-ClGkH;_EGwboi{qgGJcYlEHMl8VqeC4p<^>!WogGT77P zDXF1)j~ZeBqh%K`&D&hd!y3!!LeQFPv+4CQOYM;VWC-Zbf@~^i1QNTEfy7PPa4c0% zpZuYi?Ayh9o-vvmo2K1%UJ2i#L+Wc4R==oqLp740su}PG71i$Q#9zDQ404qCJlRid z4}G_ux9tKq!)=K*#2E)4)X!726LJ(Z3F*|!RwvM()lwo>=UfZL9JG?+D6xdHEn}SH zFx2a`wHBZ1mb%pNXl04zdhTVN&igfJDlMsZI{&+Za&@(0>|E%gWdb#joozoQ36et{ z7&Hav+hdazo7Y}hYKYpgtAbHpi2E7uu^e4)MWq?!rn%z)!i$tsD z$$Q9VR1&x`SS47`QLm%}B?q^_Gf}#*5l70a<&n%`{v4T1&ypX7?oxKO0e)O3*F?1p z!NA!m9pv0_0@^}6A{~Nr#N(b#Y6Gw;l%qx4PZOKSzihFgTc+-z_w2gxqmq&8BWjdx zhT5zA8_c#HP;<$XQe`9!YeO}5+#v?sr{oP&$uc0an^+l&Ym9)fnZ2drg$8bHHJx{!fmAfvf&%pU)Pmm@D zNtH`}@zErv=mMp{JKz}cg#aW(aQipPZ2+hJl+u9eMzz5jvH}XL<@`wJDC;r%+~8jJ zQt)=*s&@#z*6|-ypysLr?IdemEla zfR;IL1!*~md3|eL710=SKv{rm=UgVAf!-K8r!4pT#A~Wxok!m-bLfmjOKv0CftzBt zzz?MycaPv1|7LR^szi82#=?ig@0c3OdS$pWQ{JvjCiZ)+VW%t z)ZEA%N>I-^xx9h$VJbh=oxJPxs?*h3&VAvH&LWCJUU{x)^Yo`aj~hY#gfG^eA|1kQ z;1$r1;{J4B?GibVY3u99X9kuAQ4NrvsZW@JVrMhr+)e#0Gh|cgfzyDOIU8wx-LUd*3CbZW{DnR2GfDpiT=C0A8e@hyp$a*tM2 z*n~AwYRV=w$KJfKBYzRY4fkDNm><{;?u(S}>k>Lh-SzjT9;Qqn{|GN4wiQm}>XSFa z3mtFhs?-$0M)$PeB99?^F&o(x?W0$nl&!N^f=$I3E-h&mIn6dendO{EP2IX6ur2h& z(AxY2KLw+@7p@6EjxDo|(bJ?$&6AXG?&*}X_zIe)<^}&WPD3jzz0swZFXV#^>`7;u zc($xX&2;3Ak|1{y7NchzH{{#WCcYZa1u3<=gM~Xo&9DqK&-=vI%Q211DmX~@M1l@Y zn254^2JMQ}0If{*Hzp~y%r}(gO0W2);2F%Cdpr9?>F#iS#q6mG&O;lC1JO4ULY)i0 zwSJ%$;-`}u`wxQ;ZBuhr{Ldt`!N=+4!tXg#lPqj zV|=hJ+?^aHS&+TdYhSu^7`0hhX>9J>nRR@*?>wE2ex0^SgIlh_>PMoa8DQO*Gu&_~ z9mz!g(b?L=YzD_O$K@2{Gg%L=MmAKlSqnHd`J6P7tQyp))UeN8jC}`J`L~IMBFO(o zRopezAs6?>o-4Df*QI_Ayw~5AAH$8*^?8qsqfEb9rZd@|mB=n3n?0{ol4HR0(sOw} z-pSuofc++4TCfFPPTp|TBD)fei7x<XH%pLsr%9r#_NL<5&CJDlfT4YFl+D^hdA) zIRu)^evldwr(_~=B{$5z2EGC7SpniDBZF?c4DN8HCw{Y6r3NTZ!S-r(Jj3yg`zr4? z9g=^bwbhf@AheO|i`u}xK0LJLf65}UAD2xOX;V1Uj(wK7;jcQsso%o%*;l8c6=cug zK=PG>BbCr2Xn(#vWg?fbRnVsTY`?cw3*VM)^Jl6K`wYJTZW4#$*T5Y76fw|0OrDQ5 zri%jcQf|RLMMVEI-*?|30IVmuSZ0D?8R)1cQU>EUm_S-U z37}*;7TQUM#X7bg;fanr2^mVJ@d0y3q|}GPYWt~@fz^A6gWwrTUAqMw8c25x_fBWK z`FmRha;^et|4Q*(b8dbpO*t0n1=R@llvdb&kqYf3Xm%F|yQz6prO=UJB$DDNu|~0j z^_npi>`qkRA#Mvbg(9#4LMFSJKOp*v@%VB)KGNS26P%-#Lr3kK$^Cp@IG=ujpVJ+V z4ym@|C-G0T)F7Y^^1G`q`4!lN|KU7fe+PT1({j9A6vohuU}0z{bq}m4skXIT9o~Y! zkV4eYO78?rX%?x>wO9XD#xuQ40NdLa#cfbKhi6EafDw@^a=NjJ=N=Kyj<#MC>N_`w zPvgG@j7pJzI6Oh?gWg78iMPQv;_VPcb|shiml3~tO4x{JkF7WUnm8$pO2{Pk?a9^L ze!FoCQA>J@_VPHzHE1JLcB~gSfv@ZfZGd{{_Y&s2ae^XnJ%UbU23Sho-+9QY`S06= z;0Labb3QswPk3Kr&O>`4R_U+K!@7V!c`jfdt$$j_dcA=rQb%%%V?KQgZbcXv5q?dr z${)x6WL_^OT9XS`7N;u3vPO`!D|r8=DU7kDkrZ{vnfBiIE=a_TL3to+=(@04>a5GP2@e) zru!HvP7RF*a==dXb>drmUUE#JK}dr~MF3Nr*K6LV7J=ovPdgWduYem0QPQBZrcuG4 zZ4+fmTm@A@?T!@ebWpEQlJi6LM1y(_?d0TxbD`;I1Lr+y1^LzZ8$Jy);G3{+#S>i3 zSvb@Nd~5N!E+OA?M#`-MFJK*PQa3S^wAIe4(6*SP{2uY7`I!B;;3^`7V1Yla$H=yk zW=bDF5cVqpcZ0SB?1|W+xn#Lq;&{&%x~c--l}7MJ@8Cj%Rwb|nm_pR^ zHb5Ve(G4~7brKjU(=F7Cm8OsqIutAkhv~;uSM`J;0lQD$SDw>q;d5L--Xed+%-5nJ zCOHD1KmaA)wTRrcyCwD_^2NH1XXswG+UhgM^hmRkUf^-ZcVdcRmH8=gfZMH9F8tea zTbfn$Nyx%eQ}2+S#nV_bC+o|l7I8HqjpZ!ij5wajHwxhdX@*ikS%Suy*0LiCZj*zv zr@1DFea2S_f_+tJGkQ%PihpZ07rLXT(dtXFF@sGi{*YTtcGjvA+bolXQGs8zw_L0g zuaP(v+yp+<1Hrw;%g$Ktm{67aTbp5DDSgK$wQA(p7)$|&DhWw9@wH@c@HALWD+X`6 zekpNi1;HrC`Mga;rfyE?j$D+xXwzG@LI!DFsfFq|rHB458wu4ds_nXubEWH@&fHnmIn_}%@X<`zes&TI|x|$gl6LB14od7K|*yVRCgE4#zc=k zC976gN8_c1$Z|Y67)?HBcFG%^6Qqq~3n*I4&~};PLhamn^i6D@GFzFYr(#U}Rjjcg z8*faUD_I?`5!jkiEV+;-)Wg)X!Kqvi=!0WDIfZg79kmSl8c-_U&TWeR7QfYGVC<@X zKJbqN6~$FHGVU(i)6|7nDXrm!q8#5zrBpbL7Zeg=SH7*fHdKW#LN{Am!BeeO$hPQG zi;7%R$0)^gFE!dZPaG&NQ>Iu)m44<5m4NN8(ooM>%knW`kWR?|r90py1X4a#AEhxRBgY#6jCQ(|LRky z)x><_kvNiF1L1UM`&Duu-?WMp&O5^HKn=~P{p7;W}V`;-r#u9-u;!EMZHr`}b2cw;&HNnAr zWB!E#Gut#F_*rZVI>ZpeIj@N?9K1CHGBWRc!+mFn`Z{;=2ArlF{iYyLc!6~_oDbbn z&QpIYHI-x1Y}5e07w0=N;m7D>$1MB`Sr4u5Pz$SRbEJW2KHX9SEVl!5&04^4dsr*#mO4T+0x!?h7tNmT49y-Ch*wC3TLo@E^5q?w!%5!a%SG zH5ewOPneLqk3xmU^5oF-k{ryhG}HW2op1^?LE0uhgy*P(;RE4QMQ7x$JZEfbKAoDO z+>y3IS710ag&ax3_*S(_xS+&tpDleNv+&>TsH;xoQc)+})(2^|&8MNG&fU6it%fuU zn}T|YD5$D#()OP18G5ab+2IQ0x_mpPgIuXj# zoT<3ktLk-%rk|Sup(eBlKY&d}Ke?QaidYT08a0T=wWX$isa=s;ShB|**d!FF-}&vr z4rl9m;p;URgQeS4p9pTHuuwXg++1^Dmc$$WnN^8Y`g*&cQ#P|{i`9&w< zv-F(K^zgr0Tnh(eqPIDEX`}2;Wt9AZRmAgBPsQcL$`(43yuRRI;T3Y6K2JTjcXyn( zAZ$}(XX_^RFXTVyTyYJf2YS!Hu^f{=gU{qap+~Mxu}PlS!S$w@dc$L!kK z(f+TmOS~Ofp1%ob3ZEq=%6Uq!&?+q2`v+joo+nhs-s0!NwWLMy$Hng^+>fyp_|$M} z=%w+w5ND0qd&d$V=AdIdxwUInYp;h6!SRffS$mA2^b=pt5^ zJRxJyJg8<6b;i;Q)O=HlQXhV4^X-wRqAOt7@2BrIt2fQ8(H zkda&%Xene%u>6j_7sR!>!4~?CgTvH#dlm7yKAo%^8JX})ToeA7vJVBJrP$BZXJ`lS zWLtu!@HnSgjz?=McC?17;1!FjXo7sh+swW~`x|dU{7OxvHd9Wuxt`di;eFyd7p49Y z#<};L=6WO5rgEJP&{#Ew$|5$yhqdcyiR}~K5Z{eva8qm-sf|LK2GGB8iQGZ>F^Pk( z#ZLAi*u&sNtfSg0be*aNg_Q3Y;%ou-7GCCT3SIN>Qrd|Q32b$VjbhVM6n&RKGjye+ zx7I2A2{UV=u$gM)Tm=pB_2%1Q`!pB)Lat&zhkkH8;Ux92)g*`YJ9Y{BH}<=uN`ys! z@cZ@Y*juG9F}nDH>$Gc?<+l1pX-?Nuc9rF7@z`haFF6i7NL5kZ;TbBer$W%cC}mc1 z7xks3iB`o`P45By6GR*b+)iP7qKABIX&ITNW=Wk}Eh`--qroq*s3uyfXwArEaROdn z-9|Qp3dJW58+1R|PFzG)gEt^k#F?)3d@uZnJWlWX91MZ3XxE?Oeo;V%2aiz$9rZCGvb(K-`sL{}c@E?x$${N0-EO4WAhu3}dn>OCR1k9A4 z#xD#;$zFJwXoDt`(a3!Ai&isOg?&%_gUmF3C&!47l=Zqb<%_;2qKLR=8mhBR2%6}W z0v0N##10;nPIm$j<+Ah^{z8h) z1@i#P$R4!^(O-~tB`VQcUTiqWwg`Ptrs3Zc&x3X^B@8p)N4{FDy1~JqT;nKvL;Sv> zV%RU#Q2P367TU!X;u>Wi`H&Z-L)LuVXqrXB3KTj{&Q@e^?eKr_H6k7^Rpt`WuH}xl z)UR?c*L~eUGzX2aBLc?)%_)xB1T z`Az69pd;NKB@;Ue@1b?*AX~XnO^~5ozPd;bJ`E2L<2(qu67@2hyIq!V=I_PF0%}rAD8~tkU(sm8S!)#X1fHmG zT6pHZ4elhJOgnnBd!TzO^jYj?hJf4D0d71~5vPj=m~L68!H-RqJ(%(aL#?~afOOmc z-rF0Tj4rjg3nJ_f>?gx!FcbO3(6dEPd4)66tlz(E8>6Olc^ws@@FrXL*R@fBEWvsD=dob^{H?_#L#V%-uL=d~q_ z!+W{Tp!ZB;gKx33{5Cz$;8hcaM)(}(WeD+DJi#a@i;7B-`Uh{>*lart;( zt*Z7^CtfT1YS}J<`@KDpSbB?N1A3IT>%7%{<&dYPmcjIbj_EtY-1?0pqGy|Ct0SO6 zt|rJ+=MvW^>n-$#x|AHRu2PoRE#x`tGyD(qXK`)_NpX{_>@!$M-YE?UmXL7-#{Mb} zsefJJsD=omnTnQ@J9QfmteVK0ix8fU+LHKH5u>{DsTs=cQ!DMA8;;`4iqsXJq zPf%W~Hc0J|wXm|jCg6YG=Jx+A3hc71a*}fv*SYna3@F2zX2ENv&PhsYeER z`YVlVQ2^%|&ShgyNwZ7a8b z?yBG6eVg8blzO$rZgLapNFUw0n`d{%twK-wy68=*ACZYnX|t8aJZnjQdFDT%5ozVF z75q7>(3KzTkXGG>xB1qFw~bD&2sIbRwn@*}3i8DTX;%{d z`*U7lO;W3+|JAx<8z#R;Mv;{0Y|%l@sFb`VeRq3572jrN#);Oy=iT)-a9Z>9We)$- z7Ki`u0acuM=J%uL&mB6Jdayx8{P|n$Hy_^BnK@CV{hE?%2R?Ngcy!v4%Uzc5%R2pY z>Y_7eyCtXfKm7f8Oy`4#s&!h`spRyLwy9lvC9m$<=4k(8XF7d6)F-{i(Xvj1Qc4b$ zbsD~pI8}aZ=TWI0=4|=L;Xi7%Y0zPAr&ev7cX`mM@6q8W(vDTjAj_;&_5Zyr@c&%G z*d_h~^VHqYRGzrlbcH$T(Ka6T-k{UHx;HA}pT-*v3yc2$8wN)$&^HWru;yo0tX^N= zT=D;I82rE2Ja$=@S@EssyTtI>>_s-#-B7wLwxmsP8OlSap}TM$Y)@2k^E`Aq=kf#m zH)&)rnLq1xd4>vmj4jz3W}0ar{2S=*na_^kOz95q_1HL z_zrq*+F)s5-AS)x4Dc-f25=X=16*P;+O}AhLiO|V;Mq`f%TVYoc&WGo6mMPN>BwH8 zhuRujRiHZKWGvQV=CY>;Sl<&gR0OYEmRgoZWd|Di_wxG-DzI|{4CpQB4<*B$*tPk? z(B6IyR)K%(bcVVj&%rvjUyNUiqall7wXqn;v1D1d``_w<)hyb&W?lXOAdz)i`a$Jdw624|jF38Z`n$NufwsYKCLr>cjzYCZH)dPp++=FhnxM9l{Q|KhIW?+Q7 zop{ISCrSrsus7x?>g)9bGW zQFmoFo{I;5GrzZy1@)M%rmA#OpaTbpulT0I>)p{JD)wWGxGt=XHt>f%7nmE(%a|4X zQf?_TDo2Nn%!TYjfKGgnvk-b!Jk7v@-Q0tWMoS;KmVX(1(vyR(20qw|(CX$j*fI2+ z3|(E{%JefX8g2?WvLSb+vRTIvf}i7?sWE1QH}RmOg9wrYnr|jnTFvOeMx{ zSmNHorT{m9IIb^ku$2`h!B$UUU=3(Xh(Nh;EU<*#XD&ePST1@P8Hct*0v-WuW61;W z8#fZD85ysbQ%A*MJ@-YyiF*AA66w%WYFFwXD7Bb1g;-Cz+N-d%TOdkzkDk zlTVAyN{VJi8FC6+CN}VOXt_WaG&Gh>31@`$d0=3z?Sx-|)-g0UoLL1oV79iLYTR7> zpWz}D52k=)*wx%Go`1N5^l($0*bEpCjS&i3Ol6<9xLjMQaTBfsZDZ4+B z&UyWzm+?2azroqMUew2|^NxjH1nznccrOAgo7JG_vs=MOTn;!8_-q?z*aqGQ!stWO zCTs(=0H_V@N%|Y93)U2>n#!2Ze3TpV&u0g)w6BR?Fe8D=^gz>K@Cw|&IG0ci z&f@xdGK~uh1{NIVFmoz55Xvl`tgDQhA|I?X&lJDafBKS>Y& zF7*`qtFw0KPxlF6lf}+V0Y3*g>wE!WftcQKHwLuzW9xeF^EFL_%%4mIw~m(m<7sR5 zW)`<4SYgX3rf1Xowi@p7$Ye_mHjZiM&w(li_Cs0v8>p7GBLp))=dLtIfql6xLgT{k zo>l&4P#yo?++=?l(}ew9iiduHjC&!sLvL}6E2sde7B_p33HTEdADDNu59!SXN#Z@= zt!+3^meVy*Zuu2x%l~GOy$$Gw@dfNQi{?3)_Y}Toz3p#=REXx>RZO-0ML@SClD+BY zEG-IZg8jV5xf$Tfqj;*dL<&eUsLh4_HI_IkqM?)L06f;8zyK^1t!5ydYhRk7lib`8kBGGrN@o z{jIH)*d5?Z;3U8rHu!&X_lR#2IL66r2K&Kp;VqU~26u5i_!!rQ>j|9m_5(N3bMu~o zKk=U}_X8c-Cj2CBf-lei!av+|uAnN|g1=bU0fB{Nz8d07`WFDWC$^Bk4H(6xf%^(B z*jD zDBleKASm6O8E9;}ms8|#m56w^*fMRuK-1aLAm{JPi?qg#ptJn_xD(b8cighZ*Acwp z-%0O6wios`wG>ys%jI_)FcNIQjn*(><9@~?^zQ76Y4&2|P+L{ZgzV+;RZZ~ZJreljU(X`(){2G+6iA79qTMxqn7{K?3UI-Ck&tZ8@5W{esr9EzqSC zKcVr)ckpex8utxqY};q-Yh}$T7OUqyv=RyfE8<#P8o&)$vS4iC@9e?AappIAE;!1z z!%~NR?9FB87X_q>fz@J9X)ISO@gb`e)e}{B2f+?@E%?v7$oi}2Bl`!v*r)ET%{}qV zO(GLJ3s;z$@%F$d{<_;7KSkUQSUIrR6(v|MB*EsOv4Odi`|eBC-vh0Daq;`Ohjg}i zre(G8%pKqY><@oAxY<7woC;raW2Ut_+`L8?+zkMK_(zy>xeauEs1rCI>RogtjzquO zAWMv~+%O&J%#MjqOq}mG!VS$kfmi%?%RT)hoLl^bA#4$weloC!{0YbmdWw5MaUA{L zzcua`Fdn#2crmd{5(n;uAo{xaRJcapX2(Brff`QAcpp$-3otH%yz0l$ZJn)-Os z4O&ulur)Xf_JXUy1bub#HYneJA8A$8HgJ%|4JvfWd*1w~bw9WOX##eM{oT6ByA)am z8w)Q1jlga|Kjyb2*f247b@Tgmmm#C9O~Eox59XV%chS`NyWBRq3v(1a4Gi%7#U3ck z3`_@_^IxpzJ+xsq`wN%LEn%PdD=`vy&0sS=NZ=v6p|^O$dcI&6H_EtBkKb)!1{*Hy z&a${;3YmNUdF;3NiMiFRvw>;gYPvPJ68eA)NLYt91ezI=*n1WWxZk}JK2~(WcZXR) z&w>4*ky%*u8anE(gOKPKe@7?^LM(MHPpo^aldQ*W?Ks#{&wrp`FWZd&)l}jgz#Xt$ z1fLaU!INx*K_t7}axf zoe=m*c;e;RPC|5`eeq)s5%=W*?0NSKV7U;_KWD%0nHMNBNS4app@D16^`Zs(Gc5yu zw;VS_s@JylbH^B~Lm`iVI4oxZQ@oe_b1gd|r@t{hwDB>nL2&}R&5HW(w_NORRy4`@ zf_=$#u>1;&*1v$}NnR)=VGmkZc$_Pax*0Rg`_O_VHZdj8Z}qcfGCPH-U;(&9^Rz%E zcDnnLPm0+Og>C1NbusU30@wju#`a4b$~^-dMVU2vfPF0&6S8u4aTRQ1nO(+ywpri` zYZjg8u4?UXTNyI|aIg)yx&Hd#LQ4*~k0y#2`@R{*3iE+V;+{Cae<4rm&mA{8yRpYb-!t=DW8g ze>r|DcZl7^#QUlkt`$xN3EL$0wBc0%1qN8YXV-&|!ja6Ck;mw<02 ztYFgU9rYKnv+{Q~US--r=Rz5DZP3YA0j~g^OevQ0+g^HGiQ5Bjgx<_rF~naL=dtfO zz;6Pd_?i|Mm^5 z@F(PF#0T|a=b{&ZL(CE;k)BFVwx$>=7^`_2K#BC$mJC>#kMY>KGPV)=i!QsE3Fq_o zcm5=nxlj3S@|TPS4Ucfk*lWH&i<n_Z)0ivwtub66@do)~WHXyzt;m0k%pwoSCIfqu$E z1rykx9c4|AtKVd7&PC(_T*WW~oD4<4xmZk-G3JQ=9Xfz@^2W6^6t6e$fo8zZ!O1{N z$j2N^80;SbeWl;{KgUCG`@ku91$t8#o4S~{OgY`w*qW_r;bKn%$BLSAGm5s*Z_I52 zUoE$w4e&|-I`*2-wkTrx+4#x!#84BwUC>>xL2sBZ7EQ`q3baRkz%hnoPnx$Ij&jce zcg@EPUx3>B3XCa+O1?R^6=vB2L${ztp4tV~{mTr5Et>vpiL!*f-0Tl8CU$sKiZ&3&kDD(yP1CtOAO`o7H*p-lXgU}u`>Dy z8fPthOQF^tt>}+%qnaIzGr-fuC}Yzo9uA^c8l(!B7@vvbi1vBj`J#To%lVL z`nz9S4nX%k-{HZwOC}pM!0LlD-2>RxfoEcKOM+NH6R}PFvja8UwT;p6l!EBuzukWS zSmX&ed}{?uU%wHY3AQPE7`U?cXZJ$i2y1%6CqrFkWa3j#7T8tJ6`$K?ZkwHNUmKx4!qi zx85K9Pj}tEx4WzFJ!hYN_C|s-7ktz!6MKm-MX~GqpsC8?;)Plox~B)DGo^tNv+cM) zz?+3FhSi}0t5q8Yw#gCB4vtfF)woqrJJ<>C--)KwwxY)5A=gVHgFhFSidtDqnZuUH zWK)x^`wo;z8ok?Aot%Y*l7G@IP_oq8jGO#aB5)U;=9^GAe7RCF@p$6|^@K|5m(UmH zv#k>9s>V2?$U^m0VqK~o`gJSS{I1w=UCLK_1aC4DQ2O)cK9_t9TUfgr(qj2~CjpZd#r7tIGRv_}3{ zcsF8(dfQR~4aeppCs~Hl^ykD?G5~Z#|KPoq?erq)mi!KQ^)Fj{jC(|IqvU+AI)h<21vh&4xwfIZRBPmE$J{& zDM_HHjn?`2H1&mbjdDgL6xgcFYT^$7>i zT+o`fs{7s3wPwm3I0@diCv$P0Xr?@Nh}P(S&IkFOii z)a#`jmLt9fXo&xXHcvn5+C$zmQNCJi5xZFo;LiF6!_HKJ)CP^`265HNBXGK<8LS6; zaEs`Vl$)p+7p_zY>`N}TJOrbZU~D2D!{u862vo00y%h!wg`%xkT;YC5ngEj06{TaG z{8g?}c+Sfg)hOKV_>8&`oln7^&ukGW&X~=;7^nGHDxPyK|&gB!>olxOEf`-mcuqAj&7StLC zwnbCc2+9U1=SMM*$kF5Y^_DAOKiz|vO$BL-c|ST`+#P0!ePImV+P0aPOMMh{xfai9 zZM|h^uTWQV8>#U-6OI~9gW{wT`cJkua}2B(He;`;3Fht?VVp;c)G}KeJ%*@@S9e^; zeqsWMP09-}Nr`dnBWBwE!5`u`m0?0lrHQY;oF{lq6eI@L06yxSrE1}Ex(yhmA8;yu z7L5*X?yq2+Q6G@C^=pFFJ}73O)?cGhihhKetC!=81swM(mBpFVMd6-2h~9z^)Q?K- zP?mp>ylqV&{~YzxFUodxwSX%Pj3RNBtBzqB#j-(B>6{HqWm_{9eJ|-(mSDqKx>SrX zTN*d`Gj(a(Zht)q0#?kU1`}bB^Nh+L3Z43X>ZSYuIXAUHt+Ws3Zgj=phPp_WCvWl{ z(QAJl+g5dqDRE~j9W1B#-)=JzW^`A7h_S*Zts8#d(Mekbl2AN(z&XQFk4ps~oDbMq zbUn5v-zmzv*le^4 zfZfIJ^hxOm_>g!{O+|rbxsiln&M2qD%&X_AJzPo3U8w@t&Qw%?_7(HXISc#9dpLgv z`&f7YJZARgk47+^e1irNXH=`&9XlYcUKzWtjv9`?!yEgaX}O?2$Tj!jJERwX>}$`Q z^f}=gaEW5XQ&cgw2Q8)N7L8(D-uf+h zi1Q24IHoe`7NXI0I1N5wT+#*X7`>OiZN?H^V$c3bjD_bTxeUVg-i9=7^yBA;@iS+)Gnb? zY7P3;SWcx9>y+BMrgagI2?IbKsW)}lSC4r>T~YPKDP~-;0R1Y<_I_v-l?huW4#Jm; z`J{{8Y+nHWB0I5@!*+4axMb=k8-qPasKWG?Un{NH-c%HP2`+#u(pg7&O9nRGqY@oV z?B_OhLF$Zcp_2%tt&`S8*D;=wD?`bph|IvE>d zLE`(EQhuzj0m!EF>A&zK^_}zrX2}@WMas0aq&rI)+7|MXb`a~N#v4uevs#hg?z2jz zdbR-x3q4ZbhFK4gG{s;#u3#b#$u`r&>Mt!vl@HM?&{fUZYhik3C8}(;^ zf_G`@mg5OiORBU=`fi=3aq2B00@TlaB%b2;!1$Ph&U;b8(gE97X#iLQzmqB6 zTUa(q^k-moeYm=(WNq|ituef&HX9N^E19|=z~DQ?^}Di{&cMtUsi6m)HoMuD{29nFeT z5*h{SEB7rogrC%QsEyj(KhHfK8>JR_jtZ5yiG@=gcffTMX+%N=x|^ku5#d}yw4r}9 z#%n)VvTY8sn^hoXsU;|L{Nul_{fIx|D!65~4Ae1&xK|WKpqEO9wv5>r@xWb&olS4o zX5k~XU)(<_f7lMXADhuZHOPmGcs-5<+_I38Gq!kVc8^aY59r7gqcY-ID zf->rzH`uvXn&W`{ov0w}64sqtY^iSRS1{hlGV4q^5~Y6-{swebcSj%R<)X}-gG^8S zmC{^oSQ2des+?0pzyfNi^SxBX5z1`CPr(#ln*7x=0`H=$dPB|bsa5RhobRP{|jqWl~mRlE|@3<$1CV9+V~kLW@zc z#G0lE(eK0t*9A16`c71p8Y&KQonwI9nXCi4;IXKqw>%qe%vKWf2#+4_cW`sgSG>t$k&((iB-f##CCbvcCYpMo zrBa#xFUasci>{b|!dIhogL($*NN*>f(g&z-CQZ)BE+Y{LZY1*9Y0+AM8z>cFOeFfzl~3z27MDwabH#Q0WgZtXh1s~lY8)HdJF%o zgh;IhvDlL1J0(?BD@gA_7&RAL3Eogk_)t1S{9Q|7b+w&7QVS>MfjH?3`sgbZ@zPjw zIr$PzvIcWM5N*Lsb)4laJwSTJFC_XgE>~X$(i7m{+DiE=Sv9hve=AsMv^M4X!{v|K zPGPUS$5R;{C4U9897V>MlAc;LGZtQ!Mi!PxyLp`6=()oS+)zg?+hpI{&;>+$5F{=# z?QPund}@p_z<(d5I|u5!i3~VUodX&puhPK%C%on9#_lP(WmnzDh(_#!F4AzG6@bctBfS)zp~rCt;AFlga*E~T#j)SjF4UcI_1$dC2YAIBWP2HDFgebu3A4t%SNpFl7ww7vB zqVz&(i#fC==(O^+I6%ILR>Ykq($ouTeU$D1%3-;K*Z@_~L_Nn7Z<6A-;Q=vIshae7 z>rr$O9U}sczzXgf{*P}RsEa9*p~Q=SX=k@4TP8C#!D}Sw9eKeMr!*|7q@Dx@ zD5vj5W4yB+vza0$n0ZY)EX%R#%4s4WFYnuuKg)L}p&ts-TbUMdtlC=*)7&0!_!uw` zeMpG1zNA!jkxT_8Tp2^2#P6UvsInh3 zG;*IQ;vs4!?uYUxG|pRF8fMCJujX%v^Uw`#7rBS5<2g=O6H1s$@(ZH6@yMQ~zQ-zQ zH51Ruvy5^2EEpzC1rti%;X&vFC2_xIrOWn-VB-T~kH^^_g%Q>Gi17H>NQCGSJ z9y#75H7 zMJ{qx(RxuE&=_&6=ng2SKX){MPBI+Lw522*C>(Cg5U#iufDgW7DpGsPOTiktHm$l; zm`YT{W@`(5J#^C18Ff~wtCNYAmN2!R{eUcFd5v8ne)e;$pF}&_+(8zLLl3+e!n{)FL68dOh+Mm6tE*A->Z@wB@j6 zlKxGpreD;C;o)Re<5S^cs)J*i(MeCzzH3YPKzt?Jk@``ZE!IKxG*a$L{AkjVzmwNd zUAo=omi{`}MLOGN3cq2qoflj?vifRmD~HBz_pNh$G_$E+$rAKYI!>Fp+i1#&`v3 zF_<0QLtBYzSQ;W?TQy&c+*#4xt+<=kg1DA( zlVZ=pZlEL4Q7!MVQX+CXt0>15RX$5R2M2v4iDO)|;#vvE84FtuB)~t>V>p|tMl|rQ zHGU+l_DV!Da0#8M<(Bt&E%^nL#;ip5D4YnB0GSW}BDPRHPyt<}lhlOb&B`9Couid0 ze^-K!gQi*nii+B1wi-Rg`m>u=sNRwPLT3a(EQdpLazMOthsM$6+($G6evq5Gf2T$J z!Nkv)oBl((vIXY`TXQu#QO^}Bq^X2c>8&V0j{gte?~dx4E99AZn9*IAMv3 zInpc-q>g%)l45&h{Q{qY+AgPJo?7-n=4r7UjKojkSn_7*6t8$U$Mu1y^2|Jds6=1G zJ`1IuQS5cc7T@f3C)lslD>4|yCXeUx@Lcg5F`RBr5}0Wy?b%}NDmbA|(KR{OU#_$R z$b^d$a~;W9jCU%7+f1G=J(lW;rlK9jKLV{;{dJ6^YzdtjkqFQE22mc+m^|ouq($j* zo@-I#9C7|u#yeDB$Rc;3ivGr;mpVkx^f&hg5nbpxdXQd89IkuOSDQo0k~%1Z!3Bi$ zRAr5@Y10!^n*BaaQ1gj5R9$Ngc;7yqCABDL3*e{A91n>zEbYk6xy2drfkZmt^;Y)2 zv1T*tOZJgxU`yesnkrT%ZaFUsvw}IbrWVD!wG(lxVXD**nfHyjNMA-=Ld}Vn*2*$2 z73At5n{4jv2Cq;!707t#7~C#}p#9Quhp6l(H~SEk3Jxjj+LB(>M77o0pHa5|=|W@wk@Ug-4%{D+;9}$YP zdLinAKk&5Dm&uRl{b(CGQkPsBZ0?m|LwW+yomeJqM90YPjE`RtjzyO_3Z1|CTgY+h zdukJ$nUJEsb_YY<)|zQfq|zQf#LW20;4#2DXY!4M(vy~2LW}2&X5V2^oUHHB|I zgXGHEbLot;1jGF^rCRDX{u?UMs%t-#RL}@^5tFLU%9})d0=?y%+F|CFwnjOm%-{!z zi@;WR*3#PhgI1p&Z@B@-lNr=R=8UkLm?OVHN&e$#hP;cn;5+qIdb}i?RjQ2`O&??8 zq}2GTWE#KDdPiEv?uPN|8TwRV64>l@DfNl#+6^=6S>bp1v!qVeyUJ-bf0>g`bS3IO9wIEs&L0ACT;Hu4($7Ll-Iqs`RL_jY#nmx>)_U)7~qN0BH-^^U8 zqGDjNHpLg_TkfPQzoCC8W{EqA_KwV=LB7F73%DWlXKD-EmVL=QrD8xMsS}gEd9rX# zf2@@=#_FB)5Beg!k=TM5jd#Ltd#aiH?`N%@Z6_FG{E8YEXmqG)Z{FF`!2U#e!-R0P z;A`OUEg@?2KPs2hD#lB4vAV2;A}D+UwpadTYXr@4Y;2#TQfis<16RwPpBJ?xmWIS@ z(Iy{`@1qmw8(EXu@0*Rt_$Z-|Y4gXz^5PsiUN4gxnOWZ(t){-mlj-L@Xr`3ycgt1NC555{z%xc9S{4j)w0^1#BG92!w_;XBSr8!+7au=V(Ouv2emlx%2- zzRJ1Eq2k@X2z(HVarDzJSR#Ghzz}CIic#%aO*s}OnfJ>${-d9#ao2Huo!J0e%X7dH zNptdTW10R|UKjVVWEMKEy|P=FAjf&Yu2z0S`)6Q+udvq8O2t$UK?Wnqq^B zW%qNXlB<)x98M>@pjb4=Hx3S`FWEa;8f%7ft8gf5XjSR(#2}**Jg+^|9!B+L{P+z< z_v|$phcy#xv-_!Jp8qWg*RaEWuacpr4ubd{2RQ8D}-s83I(f}{uVIh??q;G*O{tW7u=nSB?z8IBQ7@a!xT}z+q28lk~BcP+yYn))GA%^m%j)SVsN|yP_(3W95#z zOv)z9X&tn2r~|d%{Y3x6pP~rHOzfG|LpWmQP7n3lxsCA#w1;>s_oJq|A4z-2PLSjB zvHL&(A5bN1+U9FKqpjvMh`8m&6E#vWs{%!8W3D-@)*u>i2`8Wnr!yw zMjc-xevjxhfNebaO&#F>=2~K6Zm|;P-X>r5C4vOXBOmha@db$~QcqBsUF$6MHHY^V zlFg#xAnQ@7-r`75-FwX)X__hYb-jQy3oD=;XEm~d&m!G+3^iF1f5VsPXQG*3qBi@= z{Ppp#^cvqpG>}^9Xezyj+43;^NhnIgoaMAGl4$YzZ)0bT70MZHpS-yGH*X7JBKck@ z6F$PD#44wW;4`5{U3L}7R&NAt5zo=#h#+@hOO1629i%+8nVhF|5S+wRZ41`W-JcBL z7w{Pz?`W?nMPG0h3}6_pDbqtP=e^CQDi^8d)Dh_==)^8E<)lA5;=~CimIkzYYO>Uu zsYQ0-CaIXv((b_OTDQYdY-{UNxXt6vMiT}#qVfq`%}0N z2B04a4?dVa?HpOq%6%(ri9OZGHF6v)rJtmkO2fFf;D#p@Yodjsil$^yWg|ta$OV!! z={EL_(jui)txRu%Z=@dPj=xQ+FU2WwV4`D{tui5DT@ZxJ&==6y;e*G7`@~UD>>LVO z#vKQ%n7UMXImej;tD!g2Lg*^St*FViEYw<_<^5AWL^Z}Y z(LWFuy^mwlkc3w1-Q_{X9^;YzmsZVj!(Gw+TD}?C)=dgM>;Yo6c+M2WTn<*DU~-wb zUwff#hXdf6!i(|~ZGxU@GAxFOYn5jx8pWeqg*$MozDvlY`bf38(cDJ(+GPC)NPWE< zK_>S~U!?q?EI~s&mFaTiOQ;rKQl2{Ew0|UrQdhqrm)Lt~Z{)Fhj(E{1K&if65-;H7 zVyQo9iuc9CxM`T(rrJwDqVa>Ty?-&rN&B@p<1c-dSI}pf(si2A-ZI`KmOe0S{w%nS zvvQa09m&>Uh;)^UHl-l9Fh@-*#2?sX>S1;$e8g0e#u}Hvijt#XjTWx_Mke4j*pbXV z`7*PQ9gl!vG@fjMSEwI!8bt64e zlF2MN zdd#mBJAfznQ*{)2rQF7^`){y~oP(yNc$-b##{@O-hpn78NIk_-7HV z{Hw6`S}X0D@4d8;7*Y7dL@GkiJZvNH!YKGBXeC#5ta4Uo7uuE;Ws{$o60qDnQMX|0 z!6M%qWjXVS^h=dpqq8)szI2g!fcjulKptIA=usR(Y@>9OVbRsrNtmq`!gq!5@t^s* z+$=7boel)96&K1}U@FkVwROdBf+0D+Ns8W&&*# zlBH+lD0*>DLr|YzgYK9D6H)3@#&5>)=}c!~D6@mR4PJs95-YxsKLT%1y{OlwNXi9n zAUF(2OPpEpYenZv64nf@QX@b|G~QY!R!lrc26Hc{@#I=&lKz+~&kki8iXVJ6iB|j- zD#fg4y|hI8g4AiThTg-JylQRSQ%B-CXo~Pq2||g!4E#raoZiRGlD~=$bP|-9=)!vH zj-xx*NjgS;5$n=Px>pk8&N%umxn)*$T5Yu&(f%IfBErxJ+;DsVr$J{>K_Cj#lX77h zH#li-VO`Xd=xg>UU%iFVt*H4q7R75cZ4br3i_L+UZOJ46Le%aA5 zd%k6j=0gYcc&f6PP!z5=ge%0iB{!w<5m9c;RPq2icyR z!IYyiR4a;>8Q4tyBz;n8OBnT~m}Y7b4gE>lXfR9dr52&nWTNsjeU@lMOyPFGq0DkP z9B(g|w=JWh@amMC?i_cISg*+FqWlQ`L9DQUA%7N+h<N(fyeg>K%Hl$rw38wk7t0c<>WlFNI4_sb|i}gx$s{e_w1n+AhSI7}hP2#~qXh`fS>4qo;OBuZ?Bv zJC)(p3I;WL@X5?|RUiC-mX|q1iiL%kfuow5< zY^U)Bd1^{qVir?Wct#bkS(ggj{L{fYJ_j*@Q~ z`KX)Gm>5m(fLFyE^baVEStvi3N?}dxHkqF^mFwlCrS?E2&pGD7vr1cixO&MrBfRCe zgI9V*V+|FkS(%nXH2#mtFJHLfM-Yj0 zDWa|s;5=9nQjg!Kp35F2FUQ(qBhm1+a(+{Fi2Z_%(XLB2{X3|nHdDrMtx*9vih8T$ zs!>*}c)2u_>%q2jWX2AJd$#^b>3Itk!aoCbQ{QX8#nHa<+DW217H>;;JaRTf?Z`Mb z6vaCCS!!cWu?w6GUSl=nF~)cCHRvy{BZl*)Xucx{Un*D~xuB!|xb(PD$xoNQ^8G}G z8+WW#z#Qi|QUq!0<=jB+7p**KYwaZLH5Q|un&>?u;lPT?NY;;0P0`*W!?{yirli4Y zM5Hf0x;{UF{)RPo_xAM$yI>)`zSt|_sXBZhc2i16&$TMhh}x?@3H?*M z%0G216njYrnK4cSg$d{Qe)JZlW%ZL(z}5rQS7H~I$Hd_^*(ttW>H}#jSt1^#COcP? z!{KaWjrLO6P1T^sV;h)65a}zlg%M-LO3_b!M@uHl`MFQgDuRJy5DBmI)wGG^Z6lh0 z=xIPaa72UDtZMvjxdU0%+`k2Dy78O#2Tdimrx|84`ff`8jHTBrHL(TWAkS1^0?6_# zkO*}P*l1a#Um&`(_uOsxcJdUdD|^1QFgu6o3X@HKS2JcX*G$U@{p^`)$i^LgZ)tfg z*N=?3xlRpIYsPWf3$;LBj7>7C zV(9=EIn!=o0~yNgC66Rj6WW=bPtWlT?R?=kZ)3f$(N3r##Kkr6^`>_@hVg-98@fLG zhx#5g(0i92EKPO>$iEdo!fH!7_@A&9*h{B?uh>B04KOS(tRv_>kR^J1E-I;LG0L#y zuoa1`tN=g35nwH_3Wud4?<@CA-xX6nZwNnBZe@(d@==cCt=ij?vURC?NVidkw0Hzj z@92HT8mX=0xBMEu5F?U&BX&UJ;ePG_Six)pTkrt0`**$5O&neq9a6;_rmid6tTxf^ zQmU4y4T~DC*?nD!eMnK0{5R##*dwz~^oyI<_GkytdFOngfZFf9hB9?tS*DTLUAdz0 zURrP8Caj?1lB}Sur@PuXF*c`<=vQ;~n%GUfW-+Jqk?vE^iu=-!9WV6qw2Mkq4$=|g z2+wwIrXpBt@h1pXDF;RnoNXsuN5mUJo;7@sI|n~S{w-4eW71KXH;?z`kM zF_WC_zF%sUdzSX+>qY*Cd?p8=rm`LUEOqhT1vGkr5^aWSFjbB{C?Tp{_zdQba}~%T z#q8dICqlRZmcLXF(VR#}yDe*>6uJ#=k{UYeScWpIQAy0BxKf#9>wv0^BPq@?9qo&s zj2<~w5cSE*s!Lp28Y))SCFL$Eg71_n-0!wIUPx48!m&VOf%X%+>iC_Cc3%+`=^Wg_ zd+EAN8*Vq1;_^UG#1%fkQV(h18!sqcZkLS{9MV~BqAyA5;(wYQjupC2P(A$D^s9O| zp`|`bOVDfE!W{RtUM7Zd*1rKk%MKL4)>oRSvKHW}p1;BNR6YwL`AYIvvyUvo7D*U# zSGbAIm($3p^e6ThcZF%qN=&l+4h3md#YO9v+P1l>;>(Dqj%d~F-)6SPcd+#ZL1>4i zk24lLmz$!K2~Cv&X6?5-v$^xCDiDq&RtPz!OrnO%g9T?%MRk!9 z%70LIpg}t}=_AxT{@l`j$`9Ilzu%q(pT+Iuvcz888Zw2oaS?bq@*n3AiZKhqP>>|6 zv{O!&XueYNYVw2qP4X0OJT*S)ocvgvZ}tnH#5U;_{2Prd;f20W>938CKd8M$(eMv+ z#73cE@^19d7^m$7n>H(ERroXZGqFUi$y}#@AoHYH^w7+!I-upg9yM&_65$Aa%86OG z6JeHA)Qy@T==?nLI`v4>yjS2R*qR-Nx{xz`%h_>?%I%QbGEb9w6h7d#mz-s9G0V^p zWxCN+2@%F({q+q0B@~@Ksqk3wFV;gkfnAoznK)bx;&tR3TNx3kI9(z92Kpqi9h!Fb z;0n78?Dn)QY!B?rOV>sDE3$I~Knf_O3wZ|hlCCMW#k`maSZmagpJp=G{ThZgSvEjU z=1?bm68#8{Qw!qGno_znRgTzMxYx1}+|`vXq0MyGytYWcnjXy#!m6n7Em?p?yj#J>0x^ zF3>@^o2)Hon>{BL;={}ww;<_??JhnssVX5+Z41v74VTx%*Wtd(Ht-J21? zh&*Ik(;=wDusf2vB!4Hp$$Ke;lQmK~qk-0~}!L zC7idNBDYy~Ia_Spzp+tbb(0rVo12BcnyLgHTyG(6VygoMoNb_1w+@i*5WDRC9Cl-^ zBq~khZzz>^77vXZMYZEwnjN;=nJ=C-+zW66pTw+S8-wdiJUxb=B0mz2(y_5BYL4c^ z1DeSXk|xC03Ryu%Xf*Ysnro{<1;cjyeYrpS%tufQn5krQ^0PLEsYiA5<&qs>T{(x! z^7O%9!A(3VI=Ls(DrH38SbQ~g1XMRA0>4qU@TJx}bO0EDWzw;-&Dur0jaA?`sr_Zm zyEiA2pW{L3AZ>}S4X-$WBYab&i2frOWH&aB<|% zBoDG>#&)t`x7(?I*>{?m#Up%jn(Yaqc3~16`%>Fd6>6 zdJOrI&nP->wfILkpU{`ww;8XzB=)7e$PMV3OlyUqDa03t&$e& z*Nxj!f>f)tXOZrE%BDe_+Q@d{u3>Y{KHilcn;UaZgyTJv>5j}{tfNo`)YpfS_t9{) zU5^L~79_o^(X>z}7BW@%4zk2v^q-Wk<(+}8QFB{GQyi^7-PJLROgFm5#%LdiXqU>g zu`gHi%?j@mx>ycS#puOypre=!cdo|2<(?B4#BE{9loG56)bh_{-=cZ3j}Zet$5%w} z;#zo1_yld8)|-7wE!#d&sGycN*KXc3a31l2&7nUzs`D7v8GJG88?}{J_)w~)+fH}W z!qm263nnL)mP42e@QI#^UYS|L7B*NKP(avL6^G$ESnas2PKZ6LCp)hZQF)Aq;h6LyXKQd?fbBdX^|b4nidMt0%Ck*51ym z`aH?(^i}f>pUGKE1A*RU#0~HX+y^=66K1%0jZ|V_gs%}^ zAocg3lH0l(qhEwS^{V15?7p4i`zvK;oh(VWIU-6)y%`^9qJvsuFwvX%(G{t#A~7YK zzA8*wpJ7R~Hzi&we<_d9N4h)Q$f(3-tueT&iToTs*&FUv2|vm=h1XXBwzLQJMN1W` zSTYO+6I^+vb^V)UUApeJdsbMZ?Q^-hz6ZVwLVK`=IS=NdC9LF_o&reJ?= zX)#?@8^QYMWK=n^y+Y7GF@yN_Y&N^hv6ySd1QUL~U|kUP0`#Ol%3GMIuqNmT(VKo) zA~VaRb!woe0&a3*oq^RSWsVv>aaelBnT28M(LRtzcMEwDn>;Qu`S)wCMH z1%^+aF>Tzi=~L4(rcKVAHhNg*jOoMb)(eRY3^#|={;wgG%_09jg2&ehjQnqJ-Iz-n zHhJ{aNi(L89`%1t^Y7dL*IeV-z|fJI88b5{&X}H_nep!&=CYFip989zgQ|{i9q9be zEbNGhnIk6*t3Pbi=*iQ^%}O^HICb>I8EnT3@E-mj6!p zpGje0$p5{{|9SbpU*3Pdjqwcvx!@Wp zk^io45;G}%r9cW5W{kLh& v0SSRI|Ho6z=-K9Pnm)<=WN8!4*Uf1%r;cSN&zYV%b>uiEZTjep;PU?qEE|Pr literal 0 HcmV?d00001 From 242c1821fd2b0f04b7529001d74fb9913975d58c Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 16 Dec 2024 11:42:21 +0800 Subject: [PATCH 03/17] rm useless change --- .../core/providers/xnnpack/xnnpack_execution_provider.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/core/providers/xnnpack/xnnpack_execution_provider.cc b/onnxruntime/core/providers/xnnpack/xnnpack_execution_provider.cc index 23f4c62472615..ee4e7be0f1f49 100644 --- a/onnxruntime/core/providers/xnnpack/xnnpack_execution_provider.cc +++ b/onnxruntime/core/providers/xnnpack/xnnpack_execution_provider.cc @@ -157,7 +157,7 @@ using namespace xnnpack; XnnpackExecutionProvider::XnnpackExecutionProvider(const XnnpackExecutionProviderInfo& info) : IExecutionProvider{kXnnpackExecutionProvider} { int xnn_thread_pool_size = info.xnn_thread_pool_size; - int ort_thread_pool_size = info.session_options ? info.session_options->intra_op_param.thread_pool_size : 0; + int ort_thread_pool_size = info.session_options ? info.session_options->intra_op_param.thread_pool_size : 1; bool allow_intra_op_spinning = (info.session_options == nullptr) || (info.session_options && info.session_options->config_options.GetConfigOrDefault( From 7c7f16a2deeceea8d48555f9af61a5ff1aed0a4d Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 16 Dec 2024 13:59:13 +0800 Subject: [PATCH 04/17] node assignment some for FP16 --- onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc b/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc index 1fd0bbfb36835..5bc4dbf53c157 100644 --- a/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc +++ b/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc @@ -124,7 +124,7 @@ TEST(XnnpackEP, TestNhwcConvReluClipFusion_FP16) { }; EPVerificationParams params; - params.ep_node_assignment = ExpectedEPNodeAssignment::All; + params.ep_node_assignment = ExpectedEPNodeAssignment::Some; params.fp32_abs_err = 0.0002f; params.graph_verifier = &verify; From 3d756968d095abddd8b5d1c2aaff213cdd25e782 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 16 Dec 2024 14:01:13 +0800 Subject: [PATCH 05/17] update --- onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc b/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc index 5bc4dbf53c157..40179f2614f38 100644 --- a/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc +++ b/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc @@ -124,6 +124,8 @@ TEST(XnnpackEP, TestNhwcConvReluClipFusion_FP16) { }; EPVerificationParams params; + // TODO: make it to ExpectedEPNodeAssignment::All if more Fp16 ops are added. + // Now, set it to Some to pass the test in iOS pipeline params.ep_node_assignment = ExpectedEPNodeAssignment::Some; params.fp32_abs_err = 0.0002f; params.graph_verifier = &verify; From c4f04556001ed4e3406971f09ef91aaa4b66f669 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 16 Dec 2024 22:31:04 +0800 Subject: [PATCH 06/17] update --- onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc b/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc index 40179f2614f38..29764936bf3a7 100644 --- a/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc +++ b/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc @@ -90,6 +90,7 @@ TEST(XnnpackEP, TestNhwcConvReluClipFusion) { RunAndVerifyOutputsWithEP(ort_model_path, "TestNhwcConvReluClipFusion", std::move(ep), feeds, params); } +#ifdef XNNPACK_FP16_SUPPORTED TEST(XnnpackEP, TestNhwcConvReluClipFusion_FP16) { const ORTCHAR_T* ort_model_path = ORT_MODEL_FOLDER "nhwc_conv_clip_relu_fp16.onnx"; @@ -124,9 +125,7 @@ TEST(XnnpackEP, TestNhwcConvReluClipFusion_FP16) { }; EPVerificationParams params; - // TODO: make it to ExpectedEPNodeAssignment::All if more Fp16 ops are added. - // Now, set it to Some to pass the test in iOS pipeline - params.ep_node_assignment = ExpectedEPNodeAssignment::Some; + params.ep_node_assignment = ExpectedEPNodeAssignment::All; params.fp32_abs_err = 0.0002f; params.graph_verifier = &verify; @@ -134,6 +133,7 @@ TEST(XnnpackEP, TestNhwcConvReluClipFusion_FP16) { // So far, CPU EP doensn't support Fp16 Conv fusion, so verify_outputs is skipped. RunAndVerifyOutputsWithEP(ort_model_path, "TestNhwcConvReluClipFusion_FP16", std::move(ep), feeds, params, {}, false); } +#endif // test we can share the cpu ep allocator with the xnnpack EP TEST(XnnpackEP, TestAllocatorSharing) { From dd9865f90993016f08dad2dad9c369dd24780233 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Tue, 17 Dec 2024 07:25:46 +0800 Subject: [PATCH 07/17] head file --- onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc b/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc index 29764936bf3a7..33de560cb135a 100644 --- a/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc +++ b/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc @@ -10,6 +10,7 @@ #include "core/framework/utils.h" #include "core/graph/graph.h" #include "core/providers/xnnpack/xnnpack_execution_provider.h" +#include "core/providers/xnnpack/xnnpack_init.h" #include "core/session/inference_session.h" #include "core/session/onnxruntime_cxx_api.h" #include "core/session/onnxruntime_session_options_config_keys.h" From d556acbb7261b57abd1124b762cf80a99585700e Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 23 Dec 2024 10:55:28 +0800 Subject: [PATCH 08/17] update --- .../core/providers/cpu/fp16/fp16_activations.h | 5 ++++- onnxruntime/core/providers/xnnpack/detail/utils.cc | 11 ++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/onnxruntime/core/providers/cpu/fp16/fp16_activations.h b/onnxruntime/core/providers/cpu/fp16/fp16_activations.h index 5404a1b180b64..7ee7af410eb8b 100644 --- a/onnxruntime/core/providers/cpu/fp16/fp16_activations.h +++ b/onnxruntime/core/providers/cpu/fp16/fp16_activations.h @@ -74,7 +74,10 @@ struct LeakyRelu : public ElementWiseRangedTransform { // TODO Add the following activations: // MlasTanhActivation, // MlasLogisticActivation, -// MlasClipActivation, +// MlasClipActivation, +// Once it's added, please update TestNhwcConvReluClipFusion_FP16 +// in xnnpack_basic_test.cc +// to enable outputs verification for Clip activation. // MlasHardSigmoidActivation, } // namespace functors diff --git a/onnxruntime/core/providers/xnnpack/detail/utils.cc b/onnxruntime/core/providers/xnnpack/detail/utils.cc index 3afd5d0970049..466f4094dd2ea 100644 --- a/onnxruntime/core/providers/xnnpack/detail/utils.cc +++ b/onnxruntime/core/providers/xnnpack/detail/utils.cc @@ -241,12 +241,12 @@ std::unique_ptr FuseActivation(const NodeUnit& node_un def.attributes = node_unit.GetNode().GetAttributes(); // use infinity as the default as that's what xnnpack uses if min/max are not set - float min = -std::numeric_limits::infinity(); + float min = std::numeric_limits::infinity(); float max = std::numeric_limits::infinity(); const auto& activation_type = activation.OpType(); if (activation_type == "Clip") { - min = std::numeric_limits::min(); + min = std::numeric_limits::lowest(); max = std::numeric_limits::max(); bool min_max_are_attributes = activation.SinceVersion() == 1 || activation.SinceVersion() == 6; @@ -274,10 +274,15 @@ std::unique_ptr FuseActivation(const NodeUnit& node_un value_to_set = utils::HasRawData(value) ? (*reinterpret_cast(value.raw_data().data())).ToFloat() : value.float_data()[0]; - } else { + } else if (GetType(arg, arg_type) && arg_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) { value_to_set = utils::HasRawData(value) ? *reinterpret_cast(value.raw_data().data()) : value.float_data()[0]; + } else { + // double isn't currently supported. + // And input and output of Clip must be float number. + // https://onnx.ai/onnx/operators/onnx__Clip.html + ORT_NOT_IMPLEMENTED("Clip min/max must be FP16 or FP32"); } } } From a4dac51df4bee05107b563ea45e282b3ffa6489a Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 23 Dec 2024 16:07:11 +0800 Subject: [PATCH 09/17] update1 --- .../test/providers/cpu/nn/conv_fp16_test.cc | 27 +++++++++-- .../providers/xnnpack/xnnpack_basic_test.cc | 45 +++++++++++++++++- .../test/testdata/conv_relu_model_fp16.onnx | Bin 0 -> 4032 bytes 3 files changed, 66 insertions(+), 6 deletions(-) create mode 100644 onnxruntime/test/testdata/conv_relu_model_fp16.onnx diff --git a/onnxruntime/test/providers/cpu/nn/conv_fp16_test.cc b/onnxruntime/test/providers/cpu/nn/conv_fp16_test.cc index 4253e36e02548..4c0968241e14d 100644 --- a/onnxruntime/test/providers/cpu/nn/conv_fp16_test.cc +++ b/onnxruntime/test/providers/cpu/nn/conv_fp16_test.cc @@ -26,6 +26,7 @@ struct ConvOpAndTestAttributes { std::unordered_set excluded_providers; string activation = ""; vector activation_parameters = {}; + string domain = onnxruntime::kMSDomain; }; /* @@ -48,7 +49,19 @@ void TestConvFp16Op(const ConvOpAndTestAttributes& attributes, int opset = 11) { std::unique_ptr tester; if (!attributes.activation.empty()) { - tester = std::make_unique("NhwcFusedConv", 1, onnxruntime::kMSDomain); + std::string_view op; + if (attributes.domain == onnxruntime::kMSDomain) { + op = "NhwcFusedConv"; + tester = std::make_unique(op, 1, attributes.domain); + } else if (attributes.domain == onnxruntime::kMSInternalNHWCDomain) { + op = "Conv"; + tester = std::make_unique(op, opset, attributes.domain); + } else if (attributes.domain == onnxruntime::kOnnxDomain) { + op = "FusedConv"; + } else { + ORT_THROW("Unsupported domain: ", attributes.domain); + } + tester->AddAttribute("activation", attributes.activation); if (!attributes.activation_parameters.empty()) { @@ -1127,7 +1140,7 @@ TEST(ConvFp16Test, Pointwise_Relu) { vector{1, 1}, // kernel_shape vector{0, 0, 0, 0}, // pads vector{1, 1}, // strides - {}, // excluded EPs + {kXnnpackExecutionProvider}, // excluded EPs "Relu" // activation }; @@ -1157,8 +1170,14 @@ TEST(ConvFp16Test, Pointwise_Relu) { MLFloat16(0.f), MLFloat16(0.f), MLFloat16(17.5f), MLFloat16(9.5f)}; - TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); - TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); + auto run_test = [&](const ConvOpAndTestAttributes& test_attrs) { + TestConvFp16Op(test_attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); + TestConvFp16Op(test_attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); + }; + run_test(attrs); + attrs.domain = kMSInternalNHWCDomain; + attrs.excluded_providers = {kCpuExecutionProvider}; + run_test(attrs); } TEST(ConvFp16Test, Conv2D_HardSigmoid) { diff --git a/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc b/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc index 33de560cb135a..4a6f63961ee06 100644 --- a/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc +++ b/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc @@ -92,6 +92,47 @@ TEST(XnnpackEP, TestNhwcConvReluClipFusion) { } #ifdef XNNPACK_FP16_SUPPORTED +// This test can be removed if Mlas implemented FP16 Clip fusion. +// Now TestNhwcConvReluClipFusion_FP16 skipped output verification +TEST(XnnpackEP, TestNhwcConvReluFusion_FP16) { + const ORTCHAR_T* ort_model_path = ORT_MODEL_FOLDER "conv_relu_model_fp16.onnx"; + + RandomValueGenerator generator; + TensorShape input_shape_x{1, 16, 16, 192}; + std::vector input_x = generator.Uniform(input_shape_x.GetDims(), -128, 128); + + OrtValue ml_value_x; + CreateMLValue(input_shape_x.GetDims(), input_x.data(), OrtMemoryInfo(), &ml_value_x); + + NameMLValMap feeds; + feeds.insert(std::make_pair("model_input", ml_value_x)); + + std::function verify = [](const Graph& graph) -> void { + ASSERT_EQ(graph.NumberOfNodes(), 2) << "Transpose nodes should have been removed, and " + "Conv+Relu should have been fused, leaving 2 nodes."; + auto node_iter = graph.Nodes().begin(); + auto check_node = [](const Node& node, const std::string& fusion_type) { + const auto& attr = node.GetAttributes(); + auto activation = attr.find("activation"); + ASSERT_NE(activation, attr.cend()) << "Fused node should have activation attribute"; + ASSERT_EQ(activation->second.s(), fusion_type); + }; + + ++node_iter; + check_node(*node_iter, "Relu"); + }; + + EPVerificationParams params; + params.ep_node_assignment = ExpectedEPNodeAssignment::Some; + params.fp32_abs_err = 0.5f; + params.graph_verifier = &verify; + + auto ep = DefaultXnnpackExecutionProvider(); + // So far, CPU EP doensn't support Fp16 Conv fusion, so verify_outputs is skipped. + RunAndVerifyOutputsWithEP(ort_model_path, "TestNhwcConvReluFusion_FP16", std::move(ep), feeds, params); +}; + +// Now, this Test is mainly check whether Xnnpack Clip fusion works. TEST(XnnpackEP, TestNhwcConvReluClipFusion_FP16) { const ORTCHAR_T* ort_model_path = ORT_MODEL_FOLDER "nhwc_conv_clip_relu_fp16.onnx"; @@ -126,8 +167,8 @@ TEST(XnnpackEP, TestNhwcConvReluClipFusion_FP16) { }; EPVerificationParams params; - params.ep_node_assignment = ExpectedEPNodeAssignment::All; - params.fp32_abs_err = 0.0002f; + params.ep_node_assignment = ExpectedEPNodeAssignment::Some; + params.fp32_abs_err = 0.5f; params.graph_verifier = &verify; auto ep = DefaultXnnpackExecutionProvider(); diff --git a/onnxruntime/test/testdata/conv_relu_model_fp16.onnx b/onnxruntime/test/testdata/conv_relu_model_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..872adfde8d008185da1529429fd5d1c64370de2c GIT binary patch literal 4032 zcmai1d2|)UnuqLMKtq5?62eU&xw%W+h>m^*i4e z6CD>_@ye|7%EIErNW?3=nB}orhFDt9ET39bI;o_rqIys!|A^rV=d8Dvnorb7FG2P`zKT(g!Ippf8vOX`e)Ul znk$P+tA*Uq)-~es9%q?ZP*OH&T4g~+v2$xu%*QQbTE>LOgvVO4|Fycoi;`ZcFKQT{ zI5F5PU1p=@9qLy2HrNLB6;9v^{#7|QJ%;om>CC2HA-}>vc_ln$y$%<#HSh}_fTl?2 z*pc85?2(#kV!YWgEI)^S`emlx7?hN6swRP$5#Ft7?9NVmnw z zyby-eCs0G8=mNVUt#wXvKMXGBiqPNH@p>4pr`MS-S%MqAVE~dB=i+$OmL+Jp@)flM z>=0t59O;R`WyTrf1MT26^mH!KUWS)!>*;o1UEOQ)I?&()8)L)exps{viM{3PIEISu z8*Z=tH8h_c!@SX+OJFtlAo(5U7?afehIh$+{Xd*zjo;`QJP}*X7QD;X3SPl4vr~?q z?uk%KSHcIdKiCtU;-k?uF)C=KLu4z?b<}9H*)rF7JsvxaC*?b`3*DE`Xz$YRB`>_H z-4dIO4rWi&$K5ilkZznu`Su_6wt-Ht&K08{bB}}rw1T}67)1KANF|e8B!2_b_9~i+ zmqVQNCg<0hNgtVU*Sv?ZpA3~G^JleE?XQhwyJ%Z>hedlw>0#;>c}KdBwLvc#?agvp zvx%@0F;PuL)#%sYI`k_)nagn&lPGKx;^1>3PfHQELj&K_9AP%W3FLvxU31<09pUT= z$#5i+XOsc#YRdc2C3pzrj2pxaJ>UiSf+nDgEE|1Krl*&14KQ4bl)r+L={8mmv$XH% zLSyT0i@i#0X@)qR@%hTCen#(VTIE+9=QyX*vO?AO$MlE)ti z22f5Q3@=5)%&n|k%aJeW-oW!rAU2GdRlfwXepJmD3!|?Cq zL$N{di8<*J^gU=K5we7RtU6^D>bg1^^=1z=@Z1QN>>dY&{3VED`x56g-uItFJy5*! zzT7IXjV%oxfGm=LEvU(T%eNFpv-7FZ-A0n{-nFJ>nLmra2m_UB?2EhoWQ4? zZ_!>#DJa+qBhnKLkAJ6QnK;aK9)Dr&#tv`!IQW&c+IWZb_C>I0n%@m=c$R(B@Qgc` zJ*iC4AJJ-UU&b`PyQw%H8qJc0{=l}uR1yh`r6hTn!LgCbSy}{Pp_iUx^fNj2Ua){| z_0MI~Tx&8$DhKuN>}7JYIn6kOsDBA5K@zHTACeNpv2t7XjZ0-2>@s}8FIAiAo%%Ck zJCM**g3EY>N7JeL0KC_iDUT27PH;7b;JJvG;t}XD0F5xCaSQKkV~I3ODE4I0qwFYo zK+-(iLc?D?mGRx1m03I!@1wFqEk0>0{%@6W6MZ1e93jxM7)jKjPm;SW*x4D z{&EW%uVm8-`XfJIUW1N0E4V)P<6OG3Tz<#->)NwJe-Rye~a&;spumd)0}36T?5rR|c# z=n;I+F-G2mIvQ5SF(RyVZzLzu$DyDv6Gf6P(sVgb`J7p30WB2$><4j>lBSF%vC1+s z81uC+>+#A|-(aziT1aZeM12MAfD6eLt~zi<9Kqt%OC;NPQMe=%;uN3pee9I*l;Ci4 zKHP-vfkx?(dfb^!2BQ>IAVsl%sMpCy{1g}^96fIc^(h zx_uw2^Br^G!<1RD8XuCt-^ti0UY2iaJM^=x*&B|R?ig=?Vk0AXul<@n7jJe8wi#fvC6IrAC1FcE4Qq2?;_r)^$uP?#h{HoVPWXB|B6aGyHQxb{jX@GW}K-bq~4B|;p->h(p^s^II= zo6u4k;QPhc>V84p3o1#I2AN-G3{=9|4eot57$pQaLm-Q#-)%YO4r7QEkO}*dqhRrk zsvpbh*sv<*hlc2S!z-P11cdx4-*-yw9==m#5gl`Sw5-49?B z+;;UOG4w4p#SuXw8dl&j4H=<6Ln3iTxVh5*72P8BQu~;@Gj@}e_Dtm}+KX;VoamL$ zHFh;8vkUG)+&%sZZY53TDXkS7*_$k>{#)%l-2^(DEK15A7h!%nC8H--O>P;TP!3^m zl;161AgiUKU>h|bwzw8Me-4f_BGf2&UU}Ml)3~B4!ZabDWjHz;Rq&iS)7Fo_VYi$8 zpo+Yp_9WxvT-QQW863lkpkDqNEyKN-hy0kRm~W6M)E{`ciC-^cBhPG&y69((4D(p1 z_YqACTO%J}8~mrWv3$1iP~9YS7GmH_IOb_>UKSHg(|6k4JH+xwh@-BB`K9=saok;K z#PTy>Dfx+P#$)hp+L^1Rr(oW$_oPlMP~*#!T)`ik94;jr9RTPH0O=F1z%RFtZ8H(SaNT(QP%Iac^K z?T|5qe8XnYTU$$UTVfxoWPwx3RH9i-@)Vi@6Q!{k zqVzQyV9oT`31g(A?rU-`^WwRVfpyn*uVjVv91LI{tvI*`hPso@HB^#UAzd9BxF7I@ zSTkH+WQ{OYX&S7NCak_vx6&WUf8yZH&*3w?)7yhwZrFtG1X}S{c2I0)SNQ4TPs}3E zBsnls%+!jYl~O6rWU)!vN+JoU1+<52uU6;#gbgxBn+s5r6egU4w*x*-_v8uljAqvl z1WPve$HGkSXsy!ich|sk{CK(o-Ba7MyKH=IDcxi5W0DL+?z73-5thOpx#OT6_fz^4 zu##CCvdn|T=AHyMg3*rUuv^L?kNAkqMS@C8MYrNY^J$vmB!hxaqoc5#sTn20_T&&n zp+P9w+(|#u-qS8hAhu(T(wdYe*#RCrN=y=f$wxOrviXJ_VWyZaseqlt0WsiOYphf4`o!&$ve|Xk$T5Rlp1mBBQCTp~ z+yFy(KWi(Epn~Z}b679IEp*?xU1mX<=N(P39M7x2iI3ns?h{)qddnq=OKRtn6uOAU z*EP{}eFMuCD;yqn-t|R8N5^f}Alu{=NCOiaL0fvl`a;$c29r0CL=G4V=-3hJ3?{-b zagP+Kgj$reF6J~(Fi!}&0{~1m(zwqquk71^CsC+m3fFWtD%|C zLvys7>}PK`n2=T@-I20srk=_#HFEGtaho zr^AGXFO&KbD|_AkqTzu<`~v@WZGyhT-%CBs`l-CMz}P^xpna4YSKXb2A<7Qlbm(3? zTs((P@`bjuQ18ZavAzW7i~oD3O|@8KEOAMeHgT3I1+xl^CsmcaT4V`}iTz`RhbFlx rF3W$-eQ|$n{MaaD9rJii@9~#2?*HxiN3K6S|6=C+|8r)C#kToR0LH>y literal 0 HcmV?d00001 From ee9819041db843d4dfdca101a8e0f5566452c876 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 23 Dec 2024 16:10:15 +0800 Subject: [PATCH 10/17] rename --- .../test/providers/xnnpack/xnnpack_basic_test.cc | 2 +- ...del_fp16.onnx => nhwc_conv_relu_model_fp16.onnx} | Bin 2 files changed, 1 insertion(+), 1 deletion(-) rename onnxruntime/test/testdata/{conv_relu_model_fp16.onnx => nhwc_conv_relu_model_fp16.onnx} (100%) diff --git a/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc b/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc index 4a6f63961ee06..f378c1fcc0a14 100644 --- a/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc +++ b/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc @@ -95,7 +95,7 @@ TEST(XnnpackEP, TestNhwcConvReluClipFusion) { // This test can be removed if Mlas implemented FP16 Clip fusion. // Now TestNhwcConvReluClipFusion_FP16 skipped output verification TEST(XnnpackEP, TestNhwcConvReluFusion_FP16) { - const ORTCHAR_T* ort_model_path = ORT_MODEL_FOLDER "conv_relu_model_fp16.onnx"; + const ORTCHAR_T* ort_model_path = ORT_MODEL_FOLDER "nhwc_conv_relu_model_fp16.onnx"; RandomValueGenerator generator; TensorShape input_shape_x{1, 16, 16, 192}; diff --git a/onnxruntime/test/testdata/conv_relu_model_fp16.onnx b/onnxruntime/test/testdata/nhwc_conv_relu_model_fp16.onnx similarity index 100% rename from onnxruntime/test/testdata/conv_relu_model_fp16.onnx rename to onnxruntime/test/testdata/nhwc_conv_relu_model_fp16.onnx From 52d099abdd2d7be57d1f3fed596004780f2d6dd6 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 23 Dec 2024 16:12:26 +0800 Subject: [PATCH 11/17] typo and lint --- onnxruntime/core/providers/cpu/fp16/fp16_activations.h | 2 +- onnxruntime/test/providers/cpu/nn/conv_fp16_test.cc | 6 +++--- onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc | 3 +-- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/onnxruntime/core/providers/cpu/fp16/fp16_activations.h b/onnxruntime/core/providers/cpu/fp16/fp16_activations.h index 7ee7af410eb8b..1a7727921877b 100644 --- a/onnxruntime/core/providers/cpu/fp16/fp16_activations.h +++ b/onnxruntime/core/providers/cpu/fp16/fp16_activations.h @@ -74,7 +74,7 @@ struct LeakyRelu : public ElementWiseRangedTransform { // TODO Add the following activations: // MlasTanhActivation, // MlasLogisticActivation, -// MlasClipActivation, +// MlasClipActivation, // Once it's added, please update TestNhwcConvReluClipFusion_FP16 // in xnnpack_basic_test.cc // to enable outputs verification for Clip activation. diff --git a/onnxruntime/test/providers/cpu/nn/conv_fp16_test.cc b/onnxruntime/test/providers/cpu/nn/conv_fp16_test.cc index 4c0968241e14d..46ffaf6737d73 100644 --- a/onnxruntime/test/providers/cpu/nn/conv_fp16_test.cc +++ b/onnxruntime/test/providers/cpu/nn/conv_fp16_test.cc @@ -56,12 +56,12 @@ void TestConvFp16Op(const ConvOpAndTestAttributes& attributes, } else if (attributes.domain == onnxruntime::kMSInternalNHWCDomain) { op = "Conv"; tester = std::make_unique(op, opset, attributes.domain); - } else if (attributes.domain == onnxruntime::kOnnxDomain) { + } else if (attributes.domain == onnxruntime::kOnnxDomain) { op = "FusedConv"; } else { ORT_THROW("Unsupported domain: ", attributes.domain); } - + tester->AddAttribute("activation", attributes.activation); if (!attributes.activation_parameters.empty()) { @@ -1176,7 +1176,7 @@ TEST(ConvFp16Test, Pointwise_Relu) { }; run_test(attrs); attrs.domain = kMSInternalNHWCDomain; - attrs.excluded_providers = {kCpuExecutionProvider}; + attrs.excluded_providers = {kCpuExecutionProvider}; run_test(attrs); } diff --git a/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc b/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc index f378c1fcc0a14..f736dab0d69f6 100644 --- a/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc +++ b/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc @@ -128,7 +128,6 @@ TEST(XnnpackEP, TestNhwcConvReluFusion_FP16) { params.graph_verifier = &verify; auto ep = DefaultXnnpackExecutionProvider(); - // So far, CPU EP doensn't support Fp16 Conv fusion, so verify_outputs is skipped. RunAndVerifyOutputsWithEP(ort_model_path, "TestNhwcConvReluFusion_FP16", std::move(ep), feeds, params); }; @@ -172,7 +171,7 @@ TEST(XnnpackEP, TestNhwcConvReluClipFusion_FP16) { params.graph_verifier = &verify; auto ep = DefaultXnnpackExecutionProvider(); - // So far, CPU EP doensn't support Fp16 Conv fusion, so verify_outputs is skipped. + // So far, CPU EP doesn't support Fp16 Conv fusion, so verify_outputs is skipped. RunAndVerifyOutputsWithEP(ort_model_path, "TestNhwcConvReluClipFusion_FP16", std::move(ep), feeds, params, {}, false); } #endif From 3cc345de0a116e2b00742e45aa0df6616f70f961 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 23 Dec 2024 16:21:36 +0800 Subject: [PATCH 12/17] revert some changes --- .../core/providers/xnnpack/detail/utils.cc | 2 +- .../test/providers/cpu/nn/conv_fp16_test.cc | 1447 +++++++---------- 2 files changed, 550 insertions(+), 899 deletions(-) diff --git a/onnxruntime/core/providers/xnnpack/detail/utils.cc b/onnxruntime/core/providers/xnnpack/detail/utils.cc index 466f4094dd2ea..a338bb1689ea3 100644 --- a/onnxruntime/core/providers/xnnpack/detail/utils.cc +++ b/onnxruntime/core/providers/xnnpack/detail/utils.cc @@ -241,7 +241,7 @@ std::unique_ptr FuseActivation(const NodeUnit& node_un def.attributes = node_unit.GetNode().GetAttributes(); // use infinity as the default as that's what xnnpack uses if min/max are not set - float min = std::numeric_limits::infinity(); + float min = -std::numeric_limits::infinity(); float max = std::numeric_limits::infinity(); const auto& activation_type = activation.OpType(); diff --git a/onnxruntime/test/providers/cpu/nn/conv_fp16_test.cc b/onnxruntime/test/providers/cpu/nn/conv_fp16_test.cc index 46ffaf6737d73..3569b4a6cf299 100644 --- a/onnxruntime/test/providers/cpu/nn/conv_fp16_test.cc +++ b/onnxruntime/test/providers/cpu/nn/conv_fp16_test.cc @@ -1,14 +1,8 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. - -#include "core/mlas/inc/mlas.h" - -#if defined(MLAS_F16VEC_INTRINSICS_SUPPORTED) || defined(COREML_ENABLE_MLPROGRAM) || defined(USE_XNNPACK) - +#include "core/graph/constants.h" #include "gtest/gtest.h" #include "test/providers/provider_test_utils.h" -#include "test/providers/run_options_config_keys.h" -#include "default_providers.h" using namespace std; namespace onnxruntime { @@ -24,143 +18,71 @@ struct ConvOpAndTestAttributes { vector pads; vector strides; std::unordered_set excluded_providers; - string activation = ""; - vector activation_parameters = {}; - string domain = onnxruntime::kMSDomain; }; -/* -Please notice that, we have predefined macros in the head of the file -#if defined(MLAS_F16VEC_INTRINSICS_SUPPORTED) || defined(COREML_ENABLE_MLPROGRAM) -When we have these two macro defines, this UT will turn into green light and work. - -If attributes.activation is set the NhwcFusedConv contrib op is used. -If you are adding support for a new EP to the test and the EP does not support NhwcFusedConv -please add the EP to the excluded_providers list. -*/ -void TestConvFp16Op(const ConvOpAndTestAttributes& attributes, - const vector>& inputs, - const vector>& input_shapes, - const std::initializer_list& expected_output, - const vector& expected_output_shape, - bool weight_is_initializer = false, - OpTester::ExpectResult expect_result = OpTester::ExpectResult::kExpectSuccess, - const std::string& err_str = "", - int opset = 11) { - std::unique_ptr tester; - if (!attributes.activation.empty()) { - std::string_view op; - if (attributes.domain == onnxruntime::kMSDomain) { - op = "NhwcFusedConv"; - tester = std::make_unique(op, 1, attributes.domain); - } else if (attributes.domain == onnxruntime::kMSInternalNHWCDomain) { - op = "Conv"; - tester = std::make_unique(op, opset, attributes.domain); - } else if (attributes.domain == onnxruntime::kOnnxDomain) { - op = "FusedConv"; - } else { - ORT_THROW("Unsupported domain: ", attributes.domain); - } - - tester->AddAttribute("activation", attributes.activation); - - if (!attributes.activation_parameters.empty()) { - tester->AddAttribute("activation_params", attributes.activation_parameters); - } - } else { - tester = std::make_unique("Conv", opset); - } - - tester->AddAttribute("group", attributes.group); - tester->AddAttribute("kernel_shape", attributes.kernel_shape); +void TestConvOp(const ConvOpAndTestAttributes& attributes, + const vector>& inputs, + const vector>& input_shapes, + const std::initializer_list& expected_output, + const vector& expected_output_shape, + bool weight_is_initializer = false, + optional epsilon = optional(), + OpTester::ExpectResult expect_result = OpTester::ExpectResult::kExpectSuccess, + const std::string& err_str = "", + int opset = 7, + bool exclude_cuda_nhwc = false) { + OpTester test("Conv", opset); + test.AddAttribute("group", attributes.group); + test.AddAttribute("kernel_shape", attributes.kernel_shape); if (!attributes.dilations.empty()) { - tester->AddAttribute("dilations", attributes.dilations); + test.AddAttribute("dilations", attributes.dilations); } // Only one of pads / auto_pad can be present if (!attributes.pads.empty()) { - tester->AddAttribute("pads", attributes.pads); + test.AddAttribute("pads", attributes.pads); } else { - tester->AddAttribute("auto_pad", attributes.auto_pad); + test.AddAttribute("auto_pad", attributes.auto_pad); } if (!attributes.strides.empty()) { - tester->AddAttribute("strides", attributes.strides); + test.AddAttribute("strides", attributes.strides); } - ORT_ENFORCE(inputs.size() <= 4, "Our name array is only setup to handle 4 inputs"); - const char* szNames[] = {"X", "W", "B", "Z"}; - tester->AddInput(szNames[0], input_shapes[0], inputs[0]); - tester->AddInput(szNames[1], input_shapes[1], inputs[1], weight_is_initializer); - if (inputs.size() >= 3) - tester->AddInput(szNames[2], input_shapes[2], inputs[2]); - if (inputs.size() >= 4) - tester->AddInput(szNames[3], input_shapes[3], inputs[3]); + ORT_ENFORCE(inputs.size() <= 3, "Our name array is only setup to handle 3 inputs"); + const char* szNames[] = {"X", "W", "B"}; + test.AddInput(szNames[0], input_shapes[0], inputs[0]); + test.AddInput(szNames[1], input_shapes[1], inputs[1], weight_is_initializer); + if (inputs.size() == 3) + test.AddInput(szNames[2], input_shapes[2], inputs[2]); - tester->AddOutput("Y", expected_output_shape, expected_output, /*no sort*/ false, 0.002f, 0.0f); + test.AddOutput("Y", expected_output_shape, expected_output); + + if (epsilon.has_value()) { + test.SetOutputTolerance(*epsilon); + } std::unordered_set excluded_providers(attributes.excluded_providers); // Disable TensorRT because weight as input is not supported excluded_providers.insert(kTensorrtExecutionProvider); - // QNN has issue with dynamic weight, auto pad with SAME_UPPER, SAME_LOWER - if (!weight_is_initializer || attributes.auto_pad == "SAME_UPPER" || attributes.auto_pad == "SAME_LOWER") { - excluded_providers.insert(kQnnExecutionProvider); - } - if (!weight_is_initializer || !attributes.activation.empty()) { - excluded_providers.insert(kCoreMLExecutionProvider); - } - tester->Run(expect_result, err_str, excluded_providers); -} -} // namespace + if (exclude_cuda_nhwc) { +#ifdef ENABLE_CUDA_NHWC_OPS + excluded_providers.insert(kCudaNHWCExecutionProvider); +#endif + } -TEST(ConvFp16Test, Conv1D_Invalid_Input_Shape) { - ConvOpAndTestAttributes attrs = { - "", // auto_pad - vector{1}, // dilations - 1, // group - vector{2}, // kernel_shape - vector{0, 0}, // pads - vector{1}, // strides - {} // excluded EPs - }; + // QNN SDK 2.10.0 has a bug that breaks support for dynamic bias inputs. + excluded_providers.insert(kQnnExecutionProvider); - vector X = vector(1, MLFloat16(1.0f)); - vector X_shape = {1, 1, 1}; - vector dummy_shape = {1, 1, 2}; - auto dummy_vals = {MLFloat16(0.0f), MLFloat16(0.0f)}; - TestConvFp16Op(attrs, {X, dummy_vals}, {X_shape, dummy_shape}, dummy_vals, dummy_shape, false, - OpTester::ExpectResult::kExpectFailure, - "Node:node1 Output:Y [ShapeInferenceError] Can't merge shape info. " - "Both inferred and declared dimension have values but they differ. Inferred=0 Declared=2 Dimension=2", - -1); // use latest opset for shape inferencing errors + test.Run(expect_result, err_str, excluded_providers); } -TEST(ConvFp16Test, Conv2D_Invalid_Input_Shape) { - ConvOpAndTestAttributes attrs = { - "", // auto_pad - vector{1, 1}, // dilations - 1, // group - vector{3, 3}, // kernel_shape - vector{0, 0, 0, 0}, // pads - vector{1, 1}, // strides - {} // excluded EPs - }; - - vector X = vector(1 * 3 * 1 * 111, MLFloat16(1.0f)); - vector X_shape = {1, 3, 1, 111}; - vector dummy_shape = {2, 2, 1, 2}; - auto dummy_vals = {MLFloat16(-0.0f), MLFloat16(0.0f), MLFloat16(-0.0f), MLFloat16(-0.0f), - MLFloat16(-0.0f), MLFloat16(0.0f), MLFloat16(-0.0f), MLFloat16(-0.0f)}; - TestConvFp16Op(attrs, {X, dummy_vals}, {X_shape, dummy_shape}, dummy_vals, dummy_shape, false, - OpTester::ExpectResult::kExpectFailure, - "Node:node1 Output:Y [ShapeInferenceError] Can't merge shape info. " - "Both inferred and declared dimension have values but they differ. Inferred=1 Declared=2 Dimension=0", - -1); // use latest opset for shape inferencing errors -} +} // namespace -TEST(ConvFp16Test, Conv1D_1) { +// Conv +TEST(ConvTest, Conv1D_1) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{1}, // dilations @@ -171,21 +93,22 @@ TEST(ConvFp16Test, Conv1D_1) { {} // excluded EPs }; - vector X = {MLFloat16(-0.215576172f), MLFloat16(0.469238281f), MLFloat16(0.442626953f), - MLFloat16(-0.451660156f), MLFloat16(-0.0521545410f), MLFloat16(0.290771484f), MLFloat16(0.250976562f)}; + vector X = {-0.21559301018714905f, 0.4691687822341919f, 0.4426700472831726f, -0.4517466723918915f, + -0.05216419696807861f, 0.29067182540893555f, 0.251010000705719f}; vector X_shape = {1, 1, 7}; - vector W = {MLFloat16(0.244750977f)}; + vector W = {0.24472862482070923f}; vector W_shape = {1, 1, 1}; vector Y_shape = {1, 1, 7}; - auto expected_vals = {MLFloat16(-0.0527624786f), MLFloat16(0.114846528f), MLFloat16(0.108333379f), - MLFloat16(-0.110544264f), MLFloat16(-0.0127648748f), MLFloat16(0.0711666048f), MLFloat16(0.0614267588f)}; + auto expected_vals = {-0.052761781960725784f, 0.11481902748346329f, 0.10833403468132019f, -0.11055534332990646f, + -0.012766072526574135f, 0.07113571465015411f, 0.061429332941770554f}; - TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); + TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); - TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); + // CoreML EP requires weight to be an initializer + TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); } -TEST(ConvFp16Test, Conv1D_1_DefaultStridesAndDilations) { +TEST(ConvTest, Conv1D_1_DefaultStridesAndDilations) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{}, // dilations @@ -196,24 +119,23 @@ TEST(ConvFp16Test, Conv1D_1_DefaultStridesAndDilations) { {} // excluded EPs }; - vector X = {MLFloat16(-0.215576172f), MLFloat16(0.469238281f), MLFloat16(0.442626953f), - MLFloat16(-0.451660156f), MLFloat16(-0.0521545410f), MLFloat16(0.290771484f), - MLFloat16(0.250976562f)}; + vector X = {-0.21559301018714905f, 0.4691687822341919f, 0.4426700472831726f, -0.4517466723918915f, + -0.05216419696807861f, 0.29067182540893555f, 0.251010000705719f}; vector X_shape = {1, 1, 7}; - vector W = {MLFloat16(0.244750977f)}; + vector W = {0.24472862482070923f}; vector W_shape = {1, 1, 1}; vector Y_shape = {1, 1, 7}; - auto expected_vals = {MLFloat16(-0.0527624786f), MLFloat16(0.114846528f), MLFloat16(0.108333379f), - MLFloat16(-0.110544264f), MLFloat16(-0.0127648748f), MLFloat16(0.0711666048f), - MLFloat16(0.0614267588f)}; + auto expected_vals = {-0.052761781960725784f, 0.11481902748346329f, 0.10833403468132019f, -0.11055534332990646f, + -0.012766072526574135f, 0.07113571465015411f, 0.061429332941770554f}; - TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); + TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); // CoreML EP requires weight to be an initializer - TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); + TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); } -TEST(ConvFp16Test, Conv1D_2) { +// Conv3 +TEST(ConvTest, Conv1D_2) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{2}, // dilations @@ -224,32 +146,33 @@ TEST(ConvFp16Test, Conv1D_2) { {} // excluded EPs }; - vector X = {MLFloat16(0.112f), MLFloat16(-0.0038f), MLFloat16(0.382f), MLFloat16(0.332f), - MLFloat16(0.0279f), MLFloat16(-0.0836f), MLFloat16(-0.41f), MLFloat16(-0.095f), - MLFloat16(-0.113f), MLFloat16(-0.0254f), MLFloat16(0.369f), MLFloat16(0.352f), - MLFloat16(-0.349f), MLFloat16(-0.22f), MLFloat16(0.231f), MLFloat16(-0.457f), - MLFloat16(-0.176f), MLFloat16(-0.0603f), MLFloat16(-0.399f), MLFloat16(-0.193f), - MLFloat16(-0.104f), MLFloat16(-0.145f), MLFloat16(-0.319f), MLFloat16(-0.153f)}; + vector X = {0.11094123125076294f, -0.0038032233715057373f, 0.3896123170852661f, 0.33259105682373047f, + 0.02794349193572998f, -0.08360505104064941f, -0.4100455045700073f, -0.09502679109573364f, + -0.11361867189407349f, -0.025495320558547974f, 0.3696536421775818f, 0.3529144525527954f, + -0.34991076588630676f, -0.22024285793304443f, 0.23085933923721313f, -0.4575521945953369f, + -0.17685726284980774f, -0.06030535697937012f, -0.3996139168739319f, -0.19385704398155212f, + -0.10454908013343811f, -0.14503943920135498f, -0.31941986083984375f, -0.15372398495674133f}; vector X_shape = {3, 1, 8}; - vector W = {MLFloat16(0.132f), MLFloat16(0.0975f), MLFloat16(0.346f), MLFloat16(0.474f)}; + vector W = {0.13225573301315308f, 0.09750443696975708f, 0.3469849228858948f, 0.4743430018424988f}; vector W_shape = {2, 1, 2}; vector Y_shape = {3, 2, 5}; - auto expected_vals = { - MLFloat16(0.0109176636f), MLFloat16(0.0520324707f), MLFloat16(0.0531311035f), MLFloat16(-0.0362854004f), - MLFloat16(-0.0540771484f), MLFloat16(0.0531005859f), MLFloat16(0.219848633f), MLFloat16(0.145385742f), - MLFloat16(-0.184692383f), MLFloat16(-0.141845703f), MLFloat16(-0.0110092163f), MLFloat16(0.0210418701f), - MLFloat16(0.0146484375f), MLFloat16(-0.0235595703f), MLFloat16(0.0304718018f), MLFloat16(-0.0535583496f), - MLFloat16(0.135864258f), MLFloat16(-0.0379028320f), MLFloat16(-0.0112762451f), MLFloat16(0.0798950195f), - MLFloat16(-0.0171508789f), MLFloat16(-0.0621032715f), MLFloat16(-0.0628051758f), MLFloat16(-0.0448303223f), - MLFloat16(-0.0421142578f), MLFloat16(-0.0834350586f), MLFloat16(-0.250000000f), MLFloat16(-0.187377930f), - MLFloat16(-0.187255859f), MLFloat16(-0.110412598f)}; - - TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); - TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); + auto expected_vals = {0.010817262344062328f, 0.05266154557466507f, 0.054253075271844864f, -0.03628557175397873f, + -0.05423086881637573f, 0.05262419581413269f, 0.22330480813980103f, 0.14844439923763275f, + -0.1848062425851822f, -0.14227961003780365f, -0.011078324168920517f, 0.02101614698767662f, + 0.014770962297916412f, -0.023767895996570587f, 0.03053247183561325f, -0.053894221782684326f, + 0.13591864705085754f, -0.03771348297595978f, -0.011907249689102173f, 0.08010470867156982f, + -0.01724436692893505f, -0.06235451623797417f, -0.06304522603750229f, -0.044972069561481476f, + -0.042245108634233475f, -0.08389100432395935f, -0.2509208619594574f, -0.18825212121009827f, + -0.18779152631759644f, -0.11083387583494186f}; + + TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); + + // CoreML EP requires weight to be an initializer + TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); } // Conv1 -TEST(ConvFp16Test, Conv1D_Bias) { +TEST(ConvTest, Conv1D_Bias) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{2}, // dilations @@ -260,33 +183,41 @@ TEST(ConvFp16Test, Conv1D_Bias) { {} // excluded EPs }; - vector X = {MLFloat16(0.458251953f), MLFloat16(0.387695312f), MLFloat16(-0.0541381836f), - MLFloat16(-0.301513672f), MLFloat16(0.192993164f), MLFloat16(-0.475830078f), - MLFloat16(0.467041016f), MLFloat16(0.407958984f), MLFloat16(0.240112305f), - MLFloat16(0.416503906f), MLFloat16(-0.0383300781f), MLFloat16(0.229736328f), - MLFloat16(0.356445312f), MLFloat16(0.128173828f), MLFloat16(0.100952148f), - MLFloat16(0.256835938f), MLFloat16(0.416992188f), MLFloat16(0.341064453f), - MLFloat16(-0.429931641f), MLFloat16(0.354492188f), MLFloat16(0.403320312f), - MLFloat16(0.101745605f), MLFloat16(0.457031250f), MLFloat16(0.0857543945f), - MLFloat16(0.380859375f), MLFloat16(0.163818359f), MLFloat16(0.123229980f), - MLFloat16(-0.199340820f), MLFloat16(0.260253906f), MLFloat16(-0.184082031f), - MLFloat16(0.311035156f), MLFloat16(0.155517578f), MLFloat16(-0.146240234f), - MLFloat16(-0.177978516f), MLFloat16(-0.0139007568f), MLFloat16(-0.0926513672f)}; + vector X = {0.4582272171974182f, 0.3877705931663513f, -0.05413919687271118f, -0.3013981878757477f, + 0.19299334287643433f, -0.4758569598197937f, 0.4670986533164978f, 0.4078403115272522f, + 0.24010121822357178f, 0.41645896434783936f, -0.038333237171173096f, 0.22969317436218262f, + 0.3565492033958435f, 0.12812334299087524f, 0.10096627473831177f, 0.25682520866394043f, + 0.41700226068496704f, 0.34114283323287964f, -0.429997980594635f, 0.3545404076576233f, + 0.40339237451553345f, 0.10174298286437988f, 0.45713120698928833f, 0.08574831485748291f, + 0.38086581230163574f, 0.16378509998321533f, 0.12321442365646362f, -0.19936135411262512f, + 0.26019394397735596f, -0.18406429886817932f, 0.3110783100128174f, 0.15553230047225952f, + -0.14629846811294556f, -0.1779327094554901f, -0.01390346884727478f, -0.09264758229255676f}; vector X_shape = {2, 2, 9}; - vector W = {MLFloat16(-0.172119141f), MLFloat16(0.323730469f)}; + vector W = {-0.17206084728240967f, 0.3236315846443176f}; vector W_shape = {1, 2, 1}; - vector B = {MLFloat16(0.378906250f)}; + vector B = {0.37892162799835205f}; vector B_shape = {1}; vector Y_shape = {2, 1, 4}; - auto expected_vals = {MLFloat16(0.378906250f), MLFloat16(0.462597132f), MLFloat16(0.493487000f), - MLFloat16(0.447991282f), MLFloat16(0.378906250f), MLFloat16(0.249894142f), - MLFloat16(0.316803873f), MLFloat16(0.327701926f)}; + auto expected_vals = {0.37892162799835205f, 0.4625728130340576f, 0.4934738576412201f, 0.44801419973373413f, + 0.37892162799835205f, 0.2499445676803589f, 0.31682088971138f, 0.32773756980895996f}; + + // For the CUDA EP: Due to CUDNN Frontend using TF32 for FP32 operations we get a higher error than using FP32 only, + // as TF32 has a 10 bit mantissa. + float epsilon = 1.1e-5f; + + // This case is not supported by cuDNN frontend, and the fallback (legacy code) requires weight to 4D tensor for NHWC. + constexpr bool exclude_cuda_nhwc = true; - TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape); - TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true); + TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, false, epsilon, + OpTester::ExpectResult::kExpectSuccess, "", 10, exclude_cuda_nhwc); + + // CoreML EP requires weight to be an initializer + TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true, epsilon, + OpTester::ExpectResult::kExpectSuccess, "", 10, exclude_cuda_nhwc); } -TEST(ConvFp16Test, Conv2D_1) { +// Conv47 +TEST(ConvTest, Conv2D_1) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{1, 1}, // dilations @@ -297,28 +228,71 @@ TEST(ConvFp16Test, Conv2D_1) { {} // excluded EPs }; - vector X = {MLFloat16(-0.0910644531f), MLFloat16(-0.325195312f)}; + vector X = {-0.09103918075561523f, -0.32513630390167236f}; vector X_shape = {2, 1, 1, 1}; - vector W = {MLFloat16(0.431152344f), MLFloat16(-0.125610352f), MLFloat16(0.448974609f), - MLFloat16(-0.310058594f), MLFloat16(0.135253906f), MLFloat16(-0.0679321289f), - MLFloat16(0.226684570f), MLFloat16(-0.173950195f), MLFloat16(-0.312988281f), - MLFloat16(-0.315429688f), MLFloat16(0.065612793f), MLFloat16(0.265625f), - MLFloat16(0.413574219f), MLFloat16(0.312255859f), MLFloat16(-0.375976562f), - MLFloat16(-0.00571060181f), MLFloat16(0.349121094f), MLFloat16(0.450927734f)}; + vector W = {0.4312484860420227f, -0.12559029459953308f, 0.44889551401138306f, -0.3100617825984955f, + 0.13522827625274658f, -0.06791308522224426f, 0.22671669721603394f, -0.17391827702522278f, + -0.31299442052841187f, -0.31545522809028625f, 0.06560015678405762f, 0.2656586766242981f, + 0.41363757848739624f, 0.31231558322906494f, -0.376018226146698f, -0.005708813667297363f, + 0.34922850131988525f, 0.45095211267471313f}; vector W_shape = {2, 1, 3, 3}; vector Y_shape = {2, 2, 1, 2}; - auto expected_vals = {MLFloat16(-0.012316823f), MLFloat16(0.0282353163f), - MLFloat16(-0.0284354091f), MLFloat16(-0.0376619101f), - MLFloat16(-0.0439839363f), MLFloat16(0.100829601f), - MLFloat16(-0.101544142f), MLFloat16(-0.134492397f)}; + auto expected_vals = {-0.012311071157455444f, 0.02822777070105076f, -0.028432954102754593f, -0.037657227367162704f, + -0.04396762326359749f, 0.10081233829259872f, -0.10154513269662857f, -0.13448859751224518f}; - TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); + TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); // NNAPI/CoreML EP requires weight to be an initializer - TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); + TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); +} + +TEST(ConvTest, Conv1D_Invalid_Input_Shape) { + ConvOpAndTestAttributes attrs = { + "", // auto_pad + vector{1}, // dilations + 1, // group + vector{2}, // kernel_shape + vector{0, 0}, // pads + vector{1}, // strides + {} // excluded EPs + }; + + vector X = vector(1, 1.0f); + vector X_shape = {1, 1, 1}; + vector dummy_shape = {1, 1, 2}; + auto dummy_vals = {0.0f, 0.0f}; + TestConvOp(attrs, {X, dummy_vals}, {X_shape, dummy_shape}, dummy_vals, dummy_shape, false, optional(), + OpTester::ExpectResult::kExpectFailure, + "Node:node1 Output:Y [ShapeInferenceError] Can't merge shape info. " + "Both inferred and declared dimension have values but they differ. Inferred=0 Declared=2 Dimension=2", + -1); // use latest opset for shape inferencing errors +} + +TEST(ConvTest, Conv2D_Invalid_Input_Shape) { + ConvOpAndTestAttributes attrs = { + "", // auto_pad + vector{1, 1}, // dilations + 1, // group + vector{3, 3}, // kernel_shape + vector{0, 0, 0, 0}, // pads + vector{1, 1}, // strides + {} // excluded EPs + }; + + vector X = vector(1 * 3 * 1 * 111, 1.0f); + vector X_shape = {1, 3, 1, 111}; + vector dummy_shape = {2, 2, 1, 2}; + auto dummy_vals = {-0.0f, 0.0f, -0.0f, -0.0f, + -0.0f, 0.0f, -0.0f, -0.0f}; + TestConvOp(attrs, {X, dummy_vals}, {X_shape, dummy_shape}, dummy_vals, dummy_shape, false, optional(), + OpTester::ExpectResult::kExpectFailure, + "Node:node1 Output:Y [ShapeInferenceError] Can't merge shape info. " + "Both inferred and declared dimension have values but they differ. Inferred=1 Declared=2 Dimension=0", + -1); // use latest opset for shape inferencing errors } -TEST(ConvFp16Test, Conv2D_2) { +// Conv30 +TEST(ConvTest, Conv2D_2) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{1, 1}, // dilations @@ -329,54 +303,43 @@ TEST(ConvFp16Test, Conv2D_2) { {} // excluded EPs }; - vector X = { - MLFloat16(0.452392578f), MLFloat16(0.155029297f), MLFloat16(0.111999512f), - MLFloat16(-0.394287109f), MLFloat16(0.262695312f), MLFloat16(0.134155273f), - MLFloat16(-0.271728516f), MLFloat16(-0.430175781f), MLFloat16(-0.268310547f), - MLFloat16(0.389404297f), MLFloat16(-0.136352539f), MLFloat16(-0.00959014893f), - MLFloat16(-0.487792969f), MLFloat16(-0.252685547f), MLFloat16(-0.281250000f), - MLFloat16(0.404296875f), MLFloat16(0.0779418945f), MLFloat16(0.326904297f), - MLFloat16(0.131103516f), MLFloat16(-0.441650391f), MLFloat16(0.124450684f), - MLFloat16(0.367431641f), MLFloat16(0.169921875f), MLFloat16(0.200927734f), - MLFloat16(0.233398438f), MLFloat16(0.386230469f), MLFloat16(0.111145020f), - MLFloat16(0.387695312f), MLFloat16(0.208129883f), MLFloat16(-0.343017578f), - MLFloat16(-0.0292510986f), MLFloat16(-0.204833984f), MLFloat16(-0.192382812f), - MLFloat16(-0.111022949f), MLFloat16(-0.328369141f), MLFloat16(-0.0180053711f), - MLFloat16(0.361816406f), MLFloat16(-0.409423828f), MLFloat16(-0.182495117f), - MLFloat16(-0.334960938f), MLFloat16(-0.340820312f), MLFloat16(0.00649642944f), - MLFloat16(0.453857422f), MLFloat16(0.0800781250f), MLFloat16(-0.147827148f), - MLFloat16(0.0344543457f), MLFloat16(-0.333251953f), MLFloat16(0.0604858398f), - MLFloat16(0.426269531f)}; + vector X = {0.45246148109436035f, 0.15498268604278564f, 0.11199361085891724f, -0.39421093463897705f, + 0.2626858949661255f, 0.13414543867111206f, -0.27184486389160156f, -0.43028733134269714f, + -0.26825493574142456f, 0.3893144130706787f, -0.13631996512413025f, -0.009590476751327515f, + -0.48771554231643677f, -0.25256502628326416f, -0.2812897562980652f, 0.4043201804161072f, + 0.07795023918151855f, 0.326981782913208f, 0.13114392757415771f, -0.4416425824165344f, + 0.12446999549865723f, 0.36739975214004517f, 0.1698915958404541f, 0.2008744478225708f, + 0.23339951038360596f, 0.38613730669021606f, 0.11117297410964966f, 0.3877097964286804f, + 0.20812749862670898f, -0.34297940135002136f, -0.029246658086776733f, -0.20483523607254028f, + -0.19244328141212463f, -0.11104947328567505f, -0.32830488681793213f, -0.01800677180290222f, + 0.3618946671485901f, -0.40949052572250366f, -0.18248388171195984f, -0.3349453806877136f, + -0.34091079235076904f, 0.006497859954833984f, 0.4537564516067505f, 0.08006560802459717f, + -0.14788749814033508f, 0.034442365169525146f, -0.33322954177856445f, 0.06049239635467529f, + 0.42619407176971436f}; vector X_shape = {1, 1, 7, 7}; - vector W = {MLFloat16(-0.440673828f)}; + vector W = {-0.4406261742115021f}; vector W_shape = {1, 1, 1, 1}; vector Y_shape = {1, 1, 7, 7}; - auto expected_vals = { - MLFloat16(-0.199340820f), MLFloat16(-0.0682983398f), MLFloat16(-0.0493469238f), - MLFloat16(0.173706055f), MLFloat16(-0.115783691f), MLFloat16(-0.0591125488f), - MLFloat16(0.119750977f), MLFloat16(0.189575195f), MLFloat16(0.118225098f), - MLFloat16(-0.171630859f), MLFloat16(0.0600891113f), MLFloat16(0.00422668457f), - MLFloat16(0.214965820f), MLFloat16(0.111328125f), MLFloat16(0.123962402f), - MLFloat16(-0.178222656f), MLFloat16(-0.0343322754f), MLFloat16(-0.144042969f), - MLFloat16(-0.0577697754f), MLFloat16(0.194580078f), MLFloat16(-0.0548400879f), - MLFloat16(-0.161865234f), MLFloat16(-0.0748901367f), MLFloat16(-0.0885620117f), - MLFloat16(-0.102844238f), MLFloat16(-0.170166016f), MLFloat16(-0.0489807129f), - MLFloat16(-0.170898438f), MLFloat16(-0.0917358398f), MLFloat16(0.151123047f), - MLFloat16(0.0128936768f), MLFloat16(0.0902709961f), MLFloat16(0.0847778320f), - MLFloat16(0.0489196777f), MLFloat16(0.144653320f), MLFloat16(0.00793457031f), - MLFloat16(-0.159423828f), MLFloat16(0.180419922f), MLFloat16(0.0804443359f), - MLFloat16(0.147583008f), MLFloat16(0.150146484f), MLFloat16(-0.00286293030f), - MLFloat16(-0.199951172f), MLFloat16(-0.0352783203f), MLFloat16(0.0651245117f), - MLFloat16(-0.0151824951f), MLFloat16(0.146850586f), MLFloat16(-0.0266571045f), - MLFloat16(-0.187866211f)}; - - TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); + auto expected_vals = {-0.19936637580394745f, -0.06828942894935608f, -0.04934731498360634f, 0.17369966208934784f, + -0.11574628204107285f, -0.05910799279808998f, 0.1197819635272026f, 0.18959586322307587f, + 0.1182001456618309f, -0.17154212296009064f, 0.06006614491343498f, 0.0042258151806890965f, + 0.21490024030208588f, 0.11128675937652588f, 0.12394362688064575f, -0.17815405130386353f, + -0.034346915781497955f, -0.14407673478126526f, -0.05778544768691063f, 0.19459928572177887f, + -0.05484473705291748f, -0.16188594698905945f, -0.07485868036746979f, -0.08851054310798645f, + -0.10284193605184555f, -0.17014220356941223f, -0.04898572340607643f, -0.17083507776260376f, + -0.09170642495155334f, 0.1511256992816925f, 0.012886842712759972f, 0.09025576710700989f, + 0.08479554951190948f, 0.0489313043653965f, 0.14465972781181335f, 0.007934254594147205f, + -0.15946026146411896f, 0.1804322451353073f, 0.08040717244148254f, 0.1475857049226761f, + 0.15021422505378723f, -0.0028631272725760937f, -0.19993697106838226f, -0.03527900204062462f, + 0.06516310572624207f, -0.015176207758486271f, 0.14682966470718384f, -0.02665453404188156f, + -0.18779225647449493f}; + TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); // NNAPI/CoreML EP requires weight to be an initializer - TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); + TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); } -TEST(ConvFp16Test, Conv2D_Bias_1) { +TEST(ConvTest, Conv2D_Bias_1) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{1, 1}, // dilations @@ -387,21 +350,23 @@ TEST(ConvFp16Test, Conv2D_Bias_1) { {} // excluded EPs }; - vector X = {MLFloat16(1.0f), MLFloat16(2.0f), MLFloat16(3.0f), MLFloat16(4.0f), MLFloat16(5.0f), MLFloat16(6.0f), MLFloat16(7.0f), MLFloat16(8.0f), MLFloat16(9.0f)}; + vector X = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}; vector X_shape = {1, 1, 3, 3}; - vector W = {MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f)}; + vector W = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; vector W_shape = {2, 1, 2, 2}; vector Y_shape = {1, 2, 2, 2}; - vector B = {MLFloat16(1.0f), MLFloat16(-1.0f)}; + vector B = {1.0f, -1.0f}; vector B_shape = {2}; - auto expected_vals = {MLFloat16(13.0f), MLFloat16(17.0f), MLFloat16(25.0f), MLFloat16(29.0f), MLFloat16(11.0f), MLFloat16(15.0f), MLFloat16(23.0f), MLFloat16(27.0f)}; + auto expected_vals = {13.0f, 17.0f, 25.0f, 29.0f, 11.0f, 15.0f, 23.0f, 27.0f}; + + TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape); - TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape); - TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true); + // NNAPI/CoreML EP requires weight to be an initializer + TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true); } // Conv48 -TEST(ConvFp16Test, Conv2D_Bias_2) { +TEST(ConvTest, Conv2D_Bias_2) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{1, 1}, // dilations @@ -412,55 +377,46 @@ TEST(ConvFp16Test, Conv2D_Bias_2) { {} // excluded EPs }; - vector X = { - MLFloat16(-0.625f), MLFloat16(0.4375f), MLFloat16(0.0625f), - MLFloat16(-0.3125f), MLFloat16(-0.6875f), MLFloat16(0.375f), - MLFloat16(0.0625f), MLFloat16(-0.375f), MLFloat16(0.6875f), - MLFloat16(0.3125f), MLFloat16(-0.0625f), MLFloat16(-0.4375f), - MLFloat16(0.625f), MLFloat16(0.25f), MLFloat16(-0.125f), - MLFloat16(-0.5f), MLFloat16(0.5625f), MLFloat16(0.1875f), - MLFloat16(-0.1875f), MLFloat16(-0.5625f), MLFloat16(0.5f), - MLFloat16(0.125f), MLFloat16(-0.25f), MLFloat16(-0.625f), - MLFloat16(0.4375f), MLFloat16(0.0625f), MLFloat16(-0.3125f), - MLFloat16(-0.6875f), MLFloat16(0.375f), MLFloat16(0.25f), - MLFloat16(-0.375f), MLFloat16(0.6875f), MLFloat16(0.3125f), - MLFloat16(-0.0625f), MLFloat16(-0.4375f), MLFloat16(0.625f), - MLFloat16(0.25f), MLFloat16(-0.125f), MLFloat16(-0.5f), - MLFloat16(0.5625f), MLFloat16(0.1875f), MLFloat16(-0.1875f), - MLFloat16(-0.5625f), MLFloat16(0.5f), MLFloat16(0.125f), - MLFloat16(-0.25f), MLFloat16(-0.625f), MLFloat16(0.4375f), - MLFloat16(0.0625f), MLFloat16(-0.3125f), MLFloat16(-0.6875f), - MLFloat16(0.375f), MLFloat16(0.125f), MLFloat16(-0.375f), - MLFloat16(0.6875f), MLFloat16(0.3125f), MLFloat16(-0.0625f), - MLFloat16(-0.4375f), MLFloat16(0.625f), MLFloat16(0.25f), - MLFloat16(-0.125f), MLFloat16(-0.5f), MLFloat16(0.5625f), - MLFloat16(0.1875f), MLFloat16(-0.1875f), MLFloat16(-0.5625f), - MLFloat16(0.5f), MLFloat16(0.125f), MLFloat16(-0.25f), - MLFloat16(-0.625f), MLFloat16(0.4375f), MLFloat16(0.0625f)}; + vector X = {-0.22904816269874573f, -0.20278319716453552f, -0.4723144471645355f, 0.027880489826202393f, + 0.2685856819152832f, -0.19361668825149536f, -0.39857280254364014f, 0.40285515785217285f, + 0.20966708660125732f, -0.39234158396720886f, -0.07502302527427673f, 0.4662899374961853f, + -0.2567148208618164f, -0.1186269223690033f, -0.1897754967212677f, -0.3967694342136383f, + -0.4268943667411804f, -0.344584584236145f, -0.4483465552330017f, -0.41608482599258423f, + -0.23649904131889343f, -0.4195239543914795f, 0.3277903199195862f, -0.11628741025924683f, + 0.2873995900154114f, 0.21717703342437744f, -0.26514798402786255f, 0.08272713422775269f, + 0.0050997138023376465f, -0.41409194469451904f, 0.2826550006866455f, 0.4891064763069153f, + -0.1522480845451355f, -0.2554396986961365f, 0.04099029302597046f, -0.35793858766555786f, + 0.2557554841041565f, 0.41162675619125366f, -0.06953108310699463f, 0.029517710208892822f, + 0.32956594228744507f, 0.4615175127983093f, -0.3216847777366638f, 0.15545696020126343f, + -0.3779126703739166f, -0.01712372899055481f, 0.07461833953857422f, 0.38875824213027954f, + 0.1980893611907959f, -0.19913813471794128f, -0.011296629905700684f, 0.30053526163101196f, + 0.4461088180541992f, 0.025034189224243164f, -0.3370230793952942f, -0.21012544631958008f, + -0.41627752780914307f, -0.43801137804985046f, 0.13566172122955322f, -0.47898364067077637f, + -0.45526939630508423f, -0.3007912039756775f, 0.06994932889938354f, -0.0749855637550354f, + -0.22754916548728943f, -0.469131737947464f, 0.08644282817840576f, 0.06157493591308594f, + -0.3920745849609375f, 0.458797812461853f, 0.18890488147735596f, 0.40145808458328247f}; vector X_shape = {1, 2, 6, 6}; - vector W = { - MLFloat16(-0.3125f), MLFloat16(-0.6875f), MLFloat16(0.375f), MLFloat16(0.025f), - MLFloat16(-0.375f), MLFloat16(0.6875f), MLFloat16(0.3125f), MLFloat16(-0.0625f), - MLFloat16(-0.4375f), MLFloat16(0.625f), MLFloat16(0.25f), MLFloat16(-0.125f), - MLFloat16(-0.5f), MLFloat16(0.5625f), MLFloat16(0.1875f), MLFloat16(-0.1875f), - MLFloat16(-0.5625f), MLFloat16(0.5f), MLFloat16(0.125f), MLFloat16(-0.25f), - MLFloat16(-0.625f), MLFloat16(0.4375f), MLFloat16(0.0625f), MLFloat16(-0.3125f), - MLFloat16(-0.6875f), MLFloat16(0.375f), MLFloat16(-0.125f), MLFloat16(-0.375f), - MLFloat16(0.6875f), MLFloat16(0.3125f), MLFloat16(-0.0625f), MLFloat16(-0.4375f)}; + vector W = {-0.48007914423942566f, -0.21048793196678162f, 0.2505034804344177f, 0.1610567569732666f, + -0.24951639771461487f, 0.1918455958366394f, 0.44247758388519287f, 0.06943017244338989f, + -0.10510382056236267f, -0.41663575172424316f, -0.3053555488586426f, -0.19126328825950623f, + -0.42332321405410767f, 0.498790979385376f, 0.081226646900177f, -0.21777048707008362f, + 0.46603143215179443f, -0.43488776683807373f, -0.3080252408981323f, -0.3844330906867981f, + -0.17214277386665344f, -0.3650006353855133f, 0.21724021434783936f, 0.1636529564857483f, + -0.22924479842185974f, 0.044009625911712646f, 0.274614155292511f, -0.06811442971229553f, + 0.450619637966156f, 0.4611729383468628f, 0.20782196521759033f, -0.3136714696884155f}; vector W_shape = {1, 2, 4, 4}; - vector B = {MLFloat16(-0.8125f)}; + vector B = {-0.40378910303115845f}; vector B_shape = {1}; vector Y_shape = {1, 1, 4, 2}; - auto expected_vals = { - MLFloat16(-0.83203125f), MLFloat16(-1.40625f), MLFloat16(-0.595312476f), MLFloat16(-1.93906248f), - MLFloat16(-0.896875024f), MLFloat16(-1.53750002f), MLFloat16(-0.904687524f), MLFloat16(-1.65937495f)}; + auto expected_vals = {-0.3419531583786011f, -0.6116723418235779f, -0.39677709341049194f, -0.7316848039627075f, + -0.5647197365760803f, 0.02788025140762329f, -0.30450713634490967f, -0.6786775588989258f}; - TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape); + TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape); - TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true); + TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true); } -TEST(ConvFp16Test, Conv2D_AutoPad1) { +TEST(ConvTest, Conv2D_AutoPad1) { ConvOpAndTestAttributes attrs = { "SAME_UPPER", // auto_pad vector{1, 1}, // dilations @@ -471,26 +427,26 @@ TEST(ConvFp16Test, Conv2D_AutoPad1) { {} // excluded EPs }; - vector X = vector(25, MLFloat16(1.0f)); + vector X = vector(25, 1.0f); vector X_shape = {1, 1, 5, 5}; - vector W = {MLFloat16(0.0f), MLFloat16(1.0f), MLFloat16(2.0f), - MLFloat16(3.0f), MLFloat16(4.0f), MLFloat16(5.0f), - MLFloat16(6.0f), MLFloat16(7.0f), MLFloat16(8.0f)}; + vector W = {0.0f, 1.0f, 2.0f, + 3.0f, 4.0f, 5.0f, + 6.0f, 7.0f, 8.0f}; vector W_shape = {1, 1, 3, 3}; vector Y_shape = {1, 1, 5, 5}; - auto expected_vals = {MLFloat16(24.0f), MLFloat16(33.0f), MLFloat16(33.0f), MLFloat16(33.0f), MLFloat16(20.0f), - MLFloat16(27.0f), MLFloat16(36.0f), MLFloat16(36.0f), MLFloat16(36.0f), MLFloat16(21.0f), - MLFloat16(27.0f), MLFloat16(36.0f), MLFloat16(36.0f), MLFloat16(36.0f), MLFloat16(21.0f), - MLFloat16(27.0f), MLFloat16(36.0f), MLFloat16(36.0f), MLFloat16(36.0f), MLFloat16(21.0f), - MLFloat16(12.0f), MLFloat16(15.0f), MLFloat16(15.0f), MLFloat16(15.0f), MLFloat16(8.0f)}; - TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); + auto expected_vals = {24.0f, 33.0f, 33.0f, 33.0f, 20.0f, + 27.0f, 36.0f, 36.0f, 36.0f, 21.0f, + 27.0f, 36.0f, 36.0f, 36.0f, 21.0f, + 27.0f, 36.0f, 36.0f, 36.0f, 21.0f, + 12.0f, 15.0f, 15.0f, 15.0f, 8.0f}; + TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); // NNAPI/CoreML EP requires weight to be an initializer - TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); + TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); } -TEST(ConvFp16Test, Conv2D_AutoPad2) { +TEST(ConvTest, Conv2D_AutoPad2) { ConvOpAndTestAttributes attrs = { "SAME_LOWER", // auto_pad vector{1, 1}, // dilations @@ -501,29 +457,31 @@ TEST(ConvFp16Test, Conv2D_AutoPad2) { {} // excluded EPs }; - vector X = {MLFloat16(1.0f), MLFloat16(0.0f), MLFloat16(1.0f), MLFloat16(0.0f), MLFloat16(1.0f), - MLFloat16(1.0f), MLFloat16(0.0f), MLFloat16(1.0f), MLFloat16(0.0f), MLFloat16(1.0f), - MLFloat16(1.0f), MLFloat16(0.0f), MLFloat16(1.0f), MLFloat16(0.0f), MLFloat16(1.0f), - MLFloat16(1.0f), MLFloat16(0.0f), MLFloat16(1.0f), MLFloat16(0.0f), MLFloat16(1.0f), - MLFloat16(1.0f), MLFloat16(0.0f), MLFloat16(1.0f), MLFloat16(0.0f), MLFloat16(1.0f)}; + vector X = {1.0f, 0.0f, 1.0f, 0.0f, 1.0f, + 1.0f, 0.0f, 1.0f, 0.0f, 1.0f, + 1.0f, 0.0f, 1.0f, 0.0f, 1.0f, + 1.0f, 0.0f, 1.0f, 0.0f, 1.0f, + 1.0f, 0.0f, 1.0f, 0.0f, 1.0f}; vector X_shape = {1, 1, 5, 5}; - vector W = {MLFloat16(0.0f), MLFloat16(1.0f), MLFloat16(2.0f), - MLFloat16(3.0f), MLFloat16(4.0f), MLFloat16(5.0f), - MLFloat16(6.0f), MLFloat16(7.0f), MLFloat16(8.0f)}; + vector W = {0.0f, 1.0f, 2.0f, + 3.0f, 4.0f, 5.0f, + 6.0f, 7.0f, 8.0f}; vector W_shape = {1, 1, 3, 3}; vector Y_shape = {1, 1, 5, 5}; - auto expected_vals = {MLFloat16(11.0f), MLFloat16(22.0f), MLFloat16(11.0f), MLFloat16(22.0f), MLFloat16(11.0f), - MLFloat16(12.0f), MLFloat16(24.0f), MLFloat16(12.0f), MLFloat16(24.0f), MLFloat16(12.0f), - MLFloat16(12.0f), MLFloat16(24.0f), MLFloat16(12.0f), MLFloat16(24.0f), MLFloat16(12.0f), - MLFloat16(12.0f), MLFloat16(24.0f), MLFloat16(12.0f), MLFloat16(24.0f), MLFloat16(12.0f), - MLFloat16(5.0f), MLFloat16(10.0f), MLFloat16(5.0f), MLFloat16(10.0f), MLFloat16(5.0f)}; - TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); - - TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); + auto expected_vals = {11.0f, 22.0f, 11.0f, 22.0f, 11.0f, + 12.0f, 24.0f, 12.0f, 24.0f, 12.0f, + 12.0f, 24.0f, 12.0f, 24.0f, 12.0f, + 12.0f, 24.0f, 12.0f, 24.0f, 12.0f, + 5.0f, 10.0f, 5.0f, 10.0f, 5.0f}; + TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); + + // NNAPI/CoreML EP requires weight to be an initializer + TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); } -TEST(ConvFp16Test, Conv3D_1) { +// Conv10 +TEST(ConvTest, Conv3D_1) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{1, 1, 1}, // dilations @@ -534,35 +492,33 @@ TEST(ConvFp16Test, Conv3D_1) { {} // excluded EPs }; - vector X = { - MLFloat16(-0.433349609f), MLFloat16(-0.483886719f), MLFloat16(-0.309570312f), - MLFloat16(0.160766602f), MLFloat16(-0.466796875f), MLFloat16(0.465820312f), - MLFloat16(-0.370605469f), MLFloat16(0.406005859f), MLFloat16(-0.0354919434f), - MLFloat16(-0.312500000f), MLFloat16(0.426757812f), MLFloat16(0.398437500f), - MLFloat16(-0.390625000f), MLFloat16(0.259033203f), MLFloat16(-0.206420898f), - MLFloat16(0.138183594f), MLFloat16(-0.201538086f), MLFloat16(0.100280762f), - MLFloat16(-0.241333008f), MLFloat16(0.123107910f), MLFloat16(0.0327453613f), - MLFloat16(0.296142578f), MLFloat16(-0.231201172f), MLFloat16(0.334472656f), - MLFloat16(0.0256805420f), MLFloat16(0.245849609f), MLFloat16(0.117248535f)}; + vector X = {-0.43337246775627136f, -0.48385289311408997f, -0.30954962968826294f, + 0.16074687242507935f, -0.46670910716056824f, 0.46576786041259766f, + -0.37056273221969604f, 0.40604978799819946f, -0.035478413105010986f, + -0.3125576674938202f, 0.42677170038223267f, 0.39851123094558716f, + -0.3906140625476837f, 0.2590462565422058f, -0.20646807551383972f, + 0.1382436752319336f, -0.20149192214012146f, 0.10030072927474976f, + -0.2413364052772522f, 0.1231224536895752f, 0.032734215259552f, + 0.29610633850097656f, -0.23117440938949585f, 0.3345826268196106f, + 0.02567422389984131f, 0.24579226970672607f, 0.11724984645843506f}; vector X_shape = {1, 1, 3, 3, 3}; - vector W = {MLFloat16(-0.442138672f)}; + vector W = {-0.44214117527008057f}; vector W_shape = {1, 1, 1, 1, 1}; vector Y_shape = {1, 1, 3, 3, 3}; - auto expected_vals = { - MLFloat16(0.191600621f), MLFloat16(0.213945031f), MLFloat16(0.136873007f), - MLFloat16(-0.0710811317f), MLFloat16(0.206388950f), MLFloat16(-0.205957174f), - MLFloat16(0.163859010f), MLFloat16(-0.179510891f), MLFloat16(0.0156923607f), - MLFloat16(0.138168335f), MLFloat16(-0.188686132f), MLFloat16(-0.176164627f), - MLFloat16(0.172710419f), MLFloat16(-0.114528596f), MLFloat16(0.0912666619f), - MLFloat16(-0.0610963106f), MLFloat16(0.0891077816f), MLFloat16(-0.0443380028f), - MLFloat16(0.106702656f), MLFloat16(-0.0544307679f), MLFloat16(-0.0144779906f), - MLFloat16(-0.130936086f), MLFloat16(0.102222979f), MLFloat16(-0.147883296f), - MLFloat16(-0.0113543607f), MLFloat16(-0.108699620f), MLFloat16(-0.0518401116f)}; - TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); - TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); + auto expected_vals = {0.19161181151866913f, 0.21393129229545593f, 0.13686463236808777f, + -0.07107280939817429f, 0.20635131001472473f, -0.20593515038490295f, + 0.16384103894233704f, -0.17953133583068848f, 0.01568646728992462f, + 0.13819462060928345f, -0.1886933445930481f, -0.17619822919368744f, + 0.17270655930042267f, -0.11453501880168915f, 0.09128803759813309f, + -0.06112322211265564f, 0.08908787369728088f, -0.04434708133339882f, + 0.10670476406812668f, -0.054437506943941116f, -0.014473143965005875f, + -0.13092079758644104f, 0.10221172869205475f, -0.1479327529668808f, + -0.011351631954312325f, -0.10867488384246826f, -0.05184098333120346f}; + TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); } -TEST(ConvFp16Test, Conv3D_2) { +// Conv22 +TEST(ConvTest, Conv3D_2) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{1, 1, 1}, // dilations @@ -573,40 +529,39 @@ TEST(ConvFp16Test, Conv3D_2) { {} // excluded EPs }; - vector X = { - MLFloat16(0.0107727051f), MLFloat16(-0.437988281f), MLFloat16(0.455322266f), MLFloat16(-0.286621094f), - MLFloat16(0.456787109f), MLFloat16(-0.0320434570f), MLFloat16(0.422851562f), MLFloat16(-0.187255859f), - MLFloat16(-0.458496094f), MLFloat16(0.0420532227f), MLFloat16(-0.133300781f), MLFloat16(-0.253662109f), - MLFloat16(-0.238403320f), MLFloat16(0.122131348f), MLFloat16(-0.177856445f), MLFloat16(0.189208984f), - MLFloat16(0.379638672f), MLFloat16(-0.0339965820f), MLFloat16(0.127319336f), MLFloat16(-0.0402832031f), - MLFloat16(0.464355469f), MLFloat16(-0.226928711f), MLFloat16(0.173950195f), MLFloat16(-0.301513672f), - MLFloat16(-0.404296875f), MLFloat16(-0.332031250f), MLFloat16(0.0465393066f), MLFloat16(-0.494873047f), - MLFloat16(0.0755004883f), MLFloat16(0.117309570f), MLFloat16(0.470458984f), MLFloat16(0.482421875f), - MLFloat16(-0.377441406f), MLFloat16(-0.0564880371f), MLFloat16(-0.107910156f), MLFloat16(0.0434875488f), - MLFloat16(0.244750977f), MLFloat16(-0.409912109f), MLFloat16(0.0616149902f), MLFloat16(0.229736328f), - MLFloat16(0.278808594f), MLFloat16(0.0814819336f), MLFloat16(0.245361328f), MLFloat16(0.0825195312f), - MLFloat16(-0.147216797f), MLFloat16(-0.430175781f), MLFloat16(0.0271759033f), MLFloat16(0.360595703f), - MLFloat16(0.249511719f), MLFloat16(-0.225097656f), MLFloat16(-0.362792969f), MLFloat16(-0.476806641f), - MLFloat16(0.112731934f), MLFloat16(0.497802734f), MLFloat16(0.268554688f), MLFloat16(0.0255279541f), - MLFloat16(-0.303710938f), MLFloat16(0.411376953f), MLFloat16(0.361572266f), MLFloat16(0.00883483887f), - MLFloat16(-0.0795898438f), MLFloat16(0.360107422f), MLFloat16(0.173217773f), MLFloat16(-0.0120086670f)}; + vector X = {0.010772407054901123f, -0.43806642293930054f, 0.455391526222229f, -0.28657248616218567f, + 0.45676887035369873f, -0.0320507287979126f, 0.4229400157928467f, -0.18730869889259338f, + -0.45851585268974304f, 0.042054951190948486f, -0.13332295417785645f, -0.25374430418014526f, + -0.23845627903938293f, 0.12214112281799316f, -0.1778157651424408f, 0.1891845464706421f, + 0.37962496280670166f, -0.033982306718826294f, 0.12737131118774414f, -0.040284961462020874f, + 0.46427029371261597f, -0.22687292098999023f, 0.17398333549499512f, -0.3014046251773834f, + -0.4043419063091278f, -0.33206477761268616f, 0.04655301570892334f, -0.4947906732559204f, + 0.0755157470703125f, 0.1173025369644165f, 0.47043120861053467f, 0.4824737310409546f, + -0.37734976410865784f, -0.056491583585739136f, -0.10790631175041199f, 0.043476223945617676f, + 0.24469023942947388f, -0.4100031852722168f, 0.0616222620010376f, 0.2296960949897766f, + 0.27883386611938477f, 0.08150351047515869f, 0.2453773021697998f, 0.08250969648361206f, + -0.1471814215183258f, -0.43011274933815f, 0.027180075645446777f, 0.3605625033378601f, + 0.24954384565353394f, -0.22505927085876465f, -0.36272895336151123f, -0.47674262523651123f, + 0.11275297403335571f, 0.49773406982421875f, 0.2686365246772766f, 0.025525271892547607f, + -0.3037869930267334f, 0.41126757860183716f, 0.36149072647094727f, 0.00883406400680542f, + -0.07959523797035217f, 0.3601323366165161f, 0.17322391271591187f, -0.012007325887680054f}; vector X_shape = {1, 1, 4, 4, 4}; - vector W = {MLFloat16(0.328125f)}; + vector W = {0.32824617624282837f}; vector W_shape = {1, 1, 1, 1, 1}; vector Y_shape = {1, 1, 4, 4, 4}; - auto expected_vals = {MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), - MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), - MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(0.00353479385f), MLFloat16(0.149402618f), MLFloat16(), - MLFloat16(), MLFloat16(-0.150444031f), MLFloat16(-0.0437393188f), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), - MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(-0.123847961f), MLFloat16(-0.03540802f), MLFloat16(), - MLFloat16(), MLFloat16(0.0914840698f), MLFloat16(0.0805091858f), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), - MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), - MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16()}; - TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); - TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); + auto expected_vals = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0035360013134777546f, 0.14948052167892456f, 0.0f, + 0.0f, -0.15050607919692993f, -0.043762750923633575f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -0.12386361509561539f, -0.03541983291506767f, 0.0f, + 0.0f, 0.09152615070343018f, 0.08054415881633759f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}; + TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); } -TEST(ConvFp16Test, Conv3D_Bias) { +// Conv23 +TEST(ConvTest, Conv3D_Bias) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{2, 2, 2}, // dilations @@ -617,96 +572,84 @@ TEST(ConvFp16Test, Conv3D_Bias) { {} // excluded EPs }; - vector X = { - MLFloat16(0.468017578f), MLFloat16(-0.461425781f), MLFloat16(0.335205078f), MLFloat16(-0.401123047f), - MLFloat16(0.417236328f), MLFloat16(-0.0481262207f), MLFloat16(0.204101562f), MLFloat16(0.0318908691f), - MLFloat16(-0.0477905273f), MLFloat16(-0.0795288086f), MLFloat16(0.498779297f), MLFloat16(0.350585938f), - MLFloat16(0.480712891f), MLFloat16(0.269775391f), MLFloat16(-0.246337891f), MLFloat16(0.190429688f), - MLFloat16(-0.118286133f), MLFloat16(-0.257568359f), MLFloat16(-0.339355469f), MLFloat16(-0.258056641f), - MLFloat16(-0.0828247070f), MLFloat16(0.351318359f), MLFloat16(-0.291259766f), MLFloat16(-0.433593750f), - MLFloat16(-0.134277344f), MLFloat16(0.440429688f), MLFloat16(0.0530700684f), MLFloat16(-0.350097656f), - MLFloat16(-0.284667969f), MLFloat16(-0.442138672f), MLFloat16(-0.0741577148f), MLFloat16(-0.109191895f), - MLFloat16(0.284423828f), MLFloat16(0.349853516f), MLFloat16(-0.193115234f), MLFloat16(0.326171875f), - MLFloat16(0.488037109f), MLFloat16(0.0557556152f), MLFloat16(-0.464599609f), MLFloat16(-0.0252380371f), - MLFloat16(-0.187866211f), MLFloat16(-0.147216797f), MLFloat16(0.207641602f), MLFloat16(0.471679688f), - MLFloat16(-0.0556640625f), MLFloat16(-0.498779297f), MLFloat16(0.227416992f), MLFloat16(0.458984375f), - MLFloat16(-0.472412109f), MLFloat16(-0.435791016f), MLFloat16(0.284179688f), MLFloat16(-0.270263672f), - MLFloat16(0.342285156f), MLFloat16(0.335693359f), MLFloat16(-0.194824219f), MLFloat16(-0.276855469f), - MLFloat16(-0.423828125f), MLFloat16(-0.438476562f), MLFloat16(0.437255859f), MLFloat16(0.306396484f), - MLFloat16(0.457031250f), MLFloat16(0.0529174805f), MLFloat16(-0.0236206055f), MLFloat16(-0.186035156f), - MLFloat16(0.0866699219f), MLFloat16(0.325439453f), MLFloat16(0.184570312f), MLFloat16(-0.198486328f), - MLFloat16(-0.275390625f), MLFloat16(0.320068359f), MLFloat16(-0.348388672f), MLFloat16(0.0999755859f), - MLFloat16(-0.113769531f), MLFloat16(0.212280273f), MLFloat16(-0.0231475830f), MLFloat16(0.167114258f), - MLFloat16(0.223144531f), MLFloat16(0.0361022949f), MLFloat16(-0.158691406f), MLFloat16(0.0599975586f), - MLFloat16(-0.0395202637f), MLFloat16(-0.484130859f), MLFloat16(0.329101562f), MLFloat16(-0.231201172f), - MLFloat16(0.394531250f), MLFloat16(-0.355468750f), MLFloat16(-0.170288086f), MLFloat16(-0.0550842285f), - MLFloat16(0.158569336f), MLFloat16(-0.418457031f), MLFloat16(-0.247436523f), MLFloat16(0.0360412598f), - MLFloat16(-0.283691406f), MLFloat16(0.460205078f), MLFloat16(0.291015625f), MLFloat16(-0.199340820f), - MLFloat16(0.380859375f), MLFloat16(-0.138427734f), MLFloat16(-0.238403320f), MLFloat16(-0.190673828f), - MLFloat16(-0.110595703f), MLFloat16(-0.0871582031f), MLFloat16(0.244506836f), MLFloat16(-0.147216797f), - MLFloat16(0.143676758f), MLFloat16(0.395507812f), MLFloat16(-0.125366211f), MLFloat16(0.115905762f), - MLFloat16(0.459716797f), MLFloat16(-0.300048828f), MLFloat16(-0.465820312f), MLFloat16(-0.339599609f), - MLFloat16(-0.267089844f), MLFloat16(0.361083984f), MLFloat16(-0.114257812f), MLFloat16(-0.0838012695f), - MLFloat16(-0.318115234f), MLFloat16(0.145141602f), MLFloat16(0.315673828f), MLFloat16(0.331787109f), - MLFloat16(-0.255859375f), MLFloat16(0.118896484f), MLFloat16(0.128295898f), MLFloat16(-0.331054688f), - MLFloat16(0.254882812f), MLFloat16(-0.467529297f), MLFloat16(-0.119812012f), MLFloat16(0.183471680f)}; + vector X = {0.46796226501464844f, -0.4613912105560303f, 0.33512794971466064f, -0.4010460674762726f, + 0.41722816228866577f, -0.048133403062820435f, 0.20415884256362915f, 0.03189706802368164f, + -0.04779183864593506f, -0.0795503556728363f, 0.4987630844116211f, 0.3506373167037964f, + 0.48065757751464844f, 0.269855260848999f, -0.2463444471359253f, 0.19044137001037598f, + -0.11830493807792664f, -0.2576887905597687f, -0.33940935134887695f, -0.257951021194458f, + -0.08279827237129211f, 0.3513314127922058f, -0.29122066497802734f, -0.43358397483825684f, + -0.13429927825927734f, 0.44032156467437744f, 0.05308258533477783f, -0.3499870300292969f, + -0.28474611043930054f, -0.44209951162338257f, -0.07418054342269897f, -0.10919415950775146f, + 0.2845439314842224f, 0.3498746156692505f, -0.19313520193099976f, 0.32609254121780396f, + 0.4880145788192749f, 0.05574071407318115f, -0.46457427740097046f, -0.02524462342262268f, + -0.18780940771102905f, -0.14720159769058228f, 0.207585871219635f, 0.47157740592956543f, + -0.05567386746406555f, -0.49871665239334106f, 0.2274145483970642f, 0.4589425325393677f, + -0.4725189805030823f, -0.4358765780925751f, 0.2841453552246094f, -0.27037882804870605f, + 0.34227508306503296f, 0.33575427532196045f, -0.19485199451446533f, -0.27679920196533203f, + -0.4238079786300659f, -0.4385119676589966f, 0.43724071979522705f, 0.3065117597579956f, + 0.45696544647216797f, 0.05291992425918579f, -0.023618370294570923f, -0.1860884726047516f, + 0.08669537305831909f, 0.32541000843048096f, 0.1846179962158203f, -0.1984834372997284f, + -0.2754465937614441f, 0.32004624605178833f, -0.34846532344818115f, 0.0999596118927002f, + -0.11374691128730774f, 0.21225297451019287f, -0.02315312623977661f, 0.1671370267868042f, + 0.22319108247756958f, 0.03609824180603027f, -0.1587022840976715f, 0.059984564781188965f, + -0.03951650857925415f, -0.4841443598270416f, 0.32919085025787354f, -0.23115816712379456f, + 0.39441078901290894f, -0.3554944396018982f, -0.17022761702537537f, -0.055081307888031006f, + 0.15856128931045532f, -0.4183449149131775f, -0.2474445104598999f, 0.03603637218475342f, + -0.2836887538433075f, 0.4602506160736084f, 0.29092925786972046f, -0.199321448802948f, + 0.380856454372406f, -0.13847029209136963f, -0.238397479057312f, -0.1907123327255249f, + -0.11061936616897583f, -0.08717870712280273f, 0.24449139833450317f, -0.14727482199668884f, + 0.1437196135520935f, 0.3955056071281433f, -0.12538021802902222f, 0.11590522527694702f, + 0.4598066806793213f, -0.30005723237991333f, -0.46578651666641235f, -0.33955082297325134f, + -0.2671887278556824f, 0.3611910939216614f, -0.11423084139823914f, -0.08382436633110046f, + -0.31819307804107666f, 0.14515334367752075f, 0.3157258629798889f, 0.33179205656051636f, + -0.2558857202529907f, 0.11888682842254639f, 0.12824326753616333f, -0.33106181025505066f, + 0.2549159526824951f, -0.46760573983192444f, -0.11983257532119751f, 0.1834418773651123f}; vector X_shape = {2, 1, 4, 4, 4}; - vector W = { - MLFloat16(0.388183594f), MLFloat16(-0.163696289f), - MLFloat16(-0.428710938f), MLFloat16(0.427734375f), - MLFloat16(0.215209961f), MLFloat16(0.00791168213f), - MLFloat16(0.338867188f), MLFloat16(0.218383789f), - MLFloat16(0.341064453f), MLFloat16(-0.170410156f), - MLFloat16(-0.0135726929f), MLFloat16(-0.267822266f), - MLFloat16(-0.348632812f), MLFloat16(-0.267333984f), - MLFloat16(-0.366943359f), MLFloat16(0.373046875f)}; + vector W = {0.388077974319458f, -0.16366064548492432f, -0.42871910333633423f, 0.4276432394981384f, + 0.21517693996429443f, 0.007908165454864502f, 0.33897721767425537f, 0.21843165159225464f, + 0.34095364809036255f, -0.17043980956077576f, -0.013571739196777344f, -0.26793742179870605f, + -0.34863436222076416f, -0.2672275900840759f, -0.36691007018089294f, 0.37296557426452637f}; vector W_shape = {2, 1, 2, 2, 2}; - vector B = {MLFloat16(0.430908203f), MLFloat16(-0.456298828f)}; + vector B = {0.4310183525085449f, -0.4564093053340912f}; vector B_shape = {2}; vector Y_shape = {2, 2, 3, 3, 3}; - auto expected_vals = { - MLFloat16(0.533115625f), MLFloat16(0.662707329f), MLFloat16(0.544498205f), - MLFloat16(0.424174339f), MLFloat16(0.627012968f), MLFloat16(0.672067642f), - MLFloat16(0.430530101f), MLFloat16(0.424569398f), MLFloat16(0.538250446f), - MLFloat16(0.693208933f), MLFloat16(0.427851349f), MLFloat16(0.221761703f), - MLFloat16(0.295077145f), MLFloat16(0.832913339f), MLFloat16(0.375999779f), - MLFloat16(0.437245011f), MLFloat16(0.291920483f), MLFloat16(0.669212699f), - MLFloat16(0.552566051f), MLFloat16(0.226370573f), MLFloat16(0.513698816f), - MLFloat16(0.303992242f), MLFloat16(0.742284894f), MLFloat16(0.266925812f), - MLFloat16(0.461661220f), MLFloat16(0.323991477f), MLFloat16(0.511511266f), - MLFloat16(-0.281706333f), MLFloat16(-0.502987564f), MLFloat16(-0.579300106f), - MLFloat16(-0.599243939f), MLFloat16(-0.505472362f), MLFloat16(-0.756186068f), - MLFloat16(-0.443522811f), MLFloat16(-0.572978139f), MLFloat16(-0.630189657f), - MLFloat16(-0.475540936f), MLFloat16(-0.728834927f), MLFloat16(-0.389986098f), - MLFloat16(-0.669373453f), MLFloat16(-0.387869477f), MLFloat16(-0.357608467f), - MLFloat16(-0.397931814f), MLFloat16(-0.547608852f), MLFloat16(-0.358573616f), - MLFloat16(-0.532473862f), MLFloat16(-0.408438683f), MLFloat16(-0.453677744f), - MLFloat16(-0.454452783f), MLFloat16(-0.379444361f), MLFloat16(-0.524981856f), - MLFloat16(-0.424284518f), MLFloat16(-0.555757523f), MLFloat16(-0.385479659f), - MLFloat16(0.449835509f), MLFloat16(0.500584960f), MLFloat16(0.493453026f), - MLFloat16(0.406748474f), MLFloat16(0.407412887f), MLFloat16(0.462785602f), - MLFloat16(0.430008084f), MLFloat16(0.406240731f), MLFloat16(0.425926626f), - MLFloat16(0.551153421f), MLFloat16(0.549696267f), MLFloat16(0.270993829f), - MLFloat16(0.402447432f), MLFloat16(0.574599743f), MLFloat16(0.418689728f), - MLFloat16(0.450668573f), MLFloat16(0.420462728f), MLFloat16(0.394942641f), - MLFloat16(0.593814850f), MLFloat16(0.165656328f), MLFloat16(0.533114314f), - MLFloat16(0.430018425f), MLFloat16(0.502558053f), MLFloat16(0.392109811f), - MLFloat16(0.407388866f), MLFloat16(0.507203162f), MLFloat16(0.382243097f), - MLFloat16(-0.423966885f), MLFloat16(-0.419248402f), MLFloat16(-0.524025679f), - MLFloat16(-0.521910012f), MLFloat16(-0.502744913f), MLFloat16(-0.512152255f), - MLFloat16(-0.425884366f), MLFloat16(-0.410446912f), MLFloat16(-0.448228836f), - MLFloat16(-0.337432563f), MLFloat16(-0.735596657f), MLFloat16(-0.371323436f), - MLFloat16(-0.488816738f), MLFloat16(-0.618983328f), MLFloat16(-0.263916761f), - MLFloat16(-0.475321025f), MLFloat16(-0.507732749f), MLFloat16(-0.420486867f), - MLFloat16(-0.558301449f), MLFloat16(-0.397618413f), MLFloat16(-0.453063041f), - MLFloat16(-0.559680939f), MLFloat16(-0.254149109f), MLFloat16(-0.535908163f), - MLFloat16(-0.480782807f), MLFloat16(-0.385932118f), MLFloat16(-0.499056786f)}; - TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape); - TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true); + auto expected_vals = {0.5332361459732056f, 0.6628494262695312f, 0.544619083404541f, 0.4242798388004303f, + 0.6271085739135742f, 0.6721994876861572f, 0.43064039945602417f, 0.4246789515018463f, + 0.53834068775177f, 0.6932926177978516f, 0.42797625064849854f, 0.2218741625547409f, + 0.29522019624710083f, 0.8329390287399292f, 0.37605351209640503f, 0.43735477328300476f, + 0.2920728623867035f, 0.6692450046539307f, 0.5527016520500183f, 0.22643595933914185f, + 0.5138190984725952f, 0.3041342794895172f, 0.7423423528671265f, 0.26707080006599426f, + 0.4617553651332855f, 0.32416003942489624f, 0.511577844619751f, -0.28187549114227295f, + -0.5031181573867798f, -0.5793710947036743f, -0.5992864370346069f, -0.5055556893348694f, + -0.7562476396560669f, -0.44363799691200256f, -0.5730307102203369f, -0.6302952766418457f, + -0.4756688177585602f, -0.728988528251648f, -0.3900943398475647f, -0.6694478988647461f, + -0.38822290301322937f, -0.35774707794189453f, -0.39807581901550293f, -0.547709047794342f, + -0.35872578620910645f, -0.5326492786407471f, -0.40852290391921997f, -0.4537881314754486f, + -0.4545857608318329f, -0.379546195268631f, -0.5250767469406128f, -0.42439910769462585f, + -0.5558245182037354f, -0.38563215732574463f, 0.44995537400245667f, 0.5007325410842896f, + 0.49359965324401855f, 0.40685802698135376f, 0.407518208026886f, 0.4628955125808716f, + 0.4301188290119171f, 0.40635955333709717f, 0.4260363280773163f, 0.55128413438797f, + 0.5498291254043579f, 0.27105778455734253f, 0.40259143710136414f, 0.5747092962265015f, + 0.4187920391559601f, 0.4507707953453064f, 0.420598566532135f, 0.3950541913509369f, + 0.593889057636261f, 0.16578882932662964f, 0.5332239270210266f, 0.43014785647392273f, + 0.50260329246521f, 0.39225444197654724f, 0.4074971079826355f, 0.5073125958442688f, + 0.3823610544204712f, -0.4240749180316925f, -0.41936254501342773f, -0.5241475105285645f, + -0.5220003724098206f, -0.502869725227356f, -0.5122783780097961f, -0.4260129928588867f, + -0.4105660617351532f, -0.4483373165130615f, -0.33759188652038574f, -0.735706090927124f, + -0.3714444637298584f, -0.4888814687728882f, -0.6191370487213135f, -0.2640320658683777f, + -0.47542816400527954f, -0.5078460574150085f, -0.4205915927886963f, -0.5584549903869629f, + -0.39770257472991943f, -0.45317384600639343f, -0.5598302483558655f, -0.2542789578437805f, + -0.5359901785850525f, -0.48090484738349915f, -0.38603779673576355f, -0.4991581439971924f}; + + // For the CUDA EP: Due to CUDNN Frontend using TF32 for FP32 operations we get a higher error than using FP32 only, + // as TF32 has a 10 bit mantissa. + float epsilon = 2.1e-4f; + + TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, false, epsilon); } -TEST(ConvFp16Test, Conv2D_group) { +TEST(ConvTest, Conv2D_group) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{1, 1}, // dilations @@ -717,28 +660,20 @@ TEST(ConvFp16Test, Conv2D_group) { {} // excluded EPs }; - vector X = { - MLFloat16(0.0f), MLFloat16(1.0f), MLFloat16(2.0f), MLFloat16(3.0f), - MLFloat16(4.0f), MLFloat16(5.0f), MLFloat16(6.0f), MLFloat16(7.0f), - MLFloat16(8.0f), MLFloat16(9.0f), MLFloat16(10.0f), MLFloat16(11.0f), - MLFloat16(12.0f), MLFloat16(13.0f), MLFloat16(14.0f), MLFloat16(15.0f), - MLFloat16(16.0f), MLFloat16(17.0f)}; + vector X = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f}; vector X_shape = {1, 2, 3, 3}; - vector W = {MLFloat16(1.0f), MLFloat16(2.0f)}; + vector W = {1.0f, 2.0f}; vector W_shape = {2, 1, 1, 1}; vector Y_shape = {1, 2, 3, 3}; - auto expected_vals = { - MLFloat16(0.0f), MLFloat16(1.0f), MLFloat16(2.0f), MLFloat16(3.0f), - MLFloat16(4.0f), MLFloat16(5.0f), MLFloat16(6.0f), MLFloat16(7.0f), - MLFloat16(8.0f), MLFloat16(18.0f), MLFloat16(20.0f), MLFloat16(22.0f), - MLFloat16(24.0f), MLFloat16(26.0f), MLFloat16(28.0f), MLFloat16(30.0f), - MLFloat16(32.0f), MLFloat16(34.0f)}; - - TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); - TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); + auto expected_vals = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 18.0f, 20.0f, 22.0f, 24.0f, 26.0f, 28.0f, 30.0f, 32.0f, 34.0f}; + + TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); + + // NNAPI/CoreML EP requires weight to be an initializer + TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); } -TEST(ConvFp16Test, Depthwise2D_Bias_Group1_Issue18992) { +TEST(ConvTest, Depthwise2D_Bias_Group1_Issue18992) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{1, 1}, // dilations @@ -749,20 +684,20 @@ TEST(ConvFp16Test, Depthwise2D_Bias_Group1_Issue18992) { {} // excluded EPs }; - vector X = {MLFloat16(1.0f)}; + vector X = {1.0f}; vector X_shape = {1, 1, 1, 1}; - vector W = {MLFloat16(0.5f)}; + vector W = {0.5f}; vector W_shape = {1, 1, 1, 1}; - vector B = {MLFloat16(0.5f)}; + vector B = {0.5f}; vector B_shape = {1}; vector Y_shape = {1, 1, 1, 1}; - auto expected_vals = {MLFloat16(1.0f)}; + auto expected_vals = {1.0f}; - TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape); - TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true); + TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape); + TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true); } -TEST(ConvFp16Test, Depthwise2D_Bias_Group2) { +TEST(ConvTest, Depthwise2D_Bias_Group2) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{1, 1}, // dilations @@ -773,34 +708,34 @@ TEST(ConvFp16Test, Depthwise2D_Bias_Group2) { {} // excluded EPs }; - vector X = { - MLFloat16(0.0f), MLFloat16(1.0f), MLFloat16(2.0f), - MLFloat16(3.0f), MLFloat16(4.0f), MLFloat16(5.0f), - MLFloat16(6.0f), MLFloat16(7.0f), MLFloat16(8.0f), + vector X = { + 0.0f, 1.0f, 2.0f, + 3.0f, 4.0f, 5.0f, + 6.0f, 7.0f, 8.0f, - MLFloat16(9.0f), MLFloat16(10.0f), MLFloat16(11.0f), - MLFloat16(12.0f), MLFloat16(13.0f), MLFloat16(14.0f), - MLFloat16(15.0f), MLFloat16(16.0f), MLFloat16(17.0f)}; + 9.0f, 10.0f, 11.0f, + 12.0f, 13.0f, 14.0f, + 15.0f, 16.0f, 17.0f}; vector X_shape = {1, 2, 3, 3}; - vector W = {MLFloat16(1.0f), MLFloat16(2.0f)}; + vector W = {1.0f, 2.0f}; vector W_shape = {2, 1, 1, 1}; - vector B = {MLFloat16(1.0f), MLFloat16(-1.0f)}; + vector B = {1.0f, -1.0f}; vector B_shape = {2}; vector Y_shape = {1, 2, 3, 3}; auto expected_vals = { - MLFloat16(1.0f), MLFloat16(2.0f), MLFloat16(3.0f), - MLFloat16(4.0f), MLFloat16(5.0f), MLFloat16(6.0f), - MLFloat16(7.0f), MLFloat16(8.0f), MLFloat16(9.0f), + 1.0f, 2.0f, 3.0f, + 4.0f, 5.0f, 6.0f, + 7.0f, 8.0f, 9.0f, - MLFloat16(17.0f), MLFloat16(19.0f), MLFloat16(21.0f), - MLFloat16(23.0f), MLFloat16(25.0f), MLFloat16(27.0f), - MLFloat16(29.0f), MLFloat16(31.0f), MLFloat16(33.0f)}; + 17.0f, 19.0f, 21.0f, + 23.0f, 25.0f, 27.0f, + 29.0f, 31.0f, 33.0f}; - TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape); - TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true); + TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape); + TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true); } -TEST(ConvFp16Test, Depthwise2D_Bias_Group15) { +TEST(ConvTest, Depthwise2D_Bias_Group15) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{1, 1}, // dilations @@ -811,169 +746,169 @@ TEST(ConvFp16Test, Depthwise2D_Bias_Group15) { {} // excluded EPs }; - vector X = { + vector X = { // C = 0 - MLFloat16(0.0f), MLFloat16(1.0f), - MLFloat16(2.0f), MLFloat16(3.0f), + 0.0f, 1.0f, + 2.0f, 3.0f, // C = 1 - MLFloat16(4.0f), MLFloat16(5.0f), - MLFloat16(6.0f), MLFloat16(7.0f), + 4.0f, 5.0f, + 6.0f, 7.0f, // C = 2 - MLFloat16(8.0f), MLFloat16(9.0f), - MLFloat16(10.0f), MLFloat16(11.0f), + 8.0f, 9.0f, + 10.0f, 11.0f, // C = 3 - MLFloat16(12.0f), MLFloat16(13.0f), - MLFloat16(14.0f), MLFloat16(15.0f), + 12.0f, 13.0f, + 14.0f, 15.0f, // C = 4 - MLFloat16(16.0f), MLFloat16(17.0f), - MLFloat16(18.0f), MLFloat16(19.0f), + 16.0f, 17.0f, + 18.0f, 19.0f, // C = 5 - MLFloat16(20.0f), MLFloat16(21.0f), - MLFloat16(22.0f), MLFloat16(23.0f), + 20.0f, 21.0f, + 22.0f, 23.0f, // C = 6 - MLFloat16(24.0f), MLFloat16(25.0f), - MLFloat16(26.0f), MLFloat16(27.0f), + 24.0f, 25.0f, + 26.0f, 27.0f, // C = 7 - MLFloat16(28.0f), MLFloat16(29.0f), - MLFloat16(30.0f), MLFloat16(31.0f), + 28.0f, 29.0f, + 30.0f, 31.0f, // C = 8 - MLFloat16(32.0f), MLFloat16(33.0f), - MLFloat16(34.0f), MLFloat16(35.0f), + 32.0f, 33.0f, + 34.0f, 35.0f, // C = 9 - MLFloat16(36.0f), MLFloat16(37.0f), - MLFloat16(38.0f), MLFloat16(39.0f), + 36.0f, 37.0f, + 38.0f, 39.0f, // C = 10 - MLFloat16(40.0f), MLFloat16(41.0f), - MLFloat16(42.0f), MLFloat16(43.0f), + 40.0f, 41.0f, + 42.0f, 43.0f, // C = 11 - MLFloat16(44.0f), MLFloat16(45.0f), - MLFloat16(46.0f), MLFloat16(47.0f), + 44.0f, 45.0f, + 46.0f, 47.0f, // C = 12 - MLFloat16(48.0f), MLFloat16(49.0f), - MLFloat16(50.0f), MLFloat16(51.0f), + 48.0f, 49.0f, + 50.0f, 51.0f, // C = 13 - MLFloat16(52.0f), MLFloat16(53.0f), - MLFloat16(54.0f), MLFloat16(55.0f), + 52.0f, 53.0f, + 54.0f, 55.0f, // C = 14 - MLFloat16(56.0f), MLFloat16(57.0f), - MLFloat16(58.0f), MLFloat16(59.0f)}; + 56.0f, 57.0f, + 58.0f, 59.0f}; vector X_shape = {1, 15, 2, 2}; - vector W = { + vector W = { // M = 0 - MLFloat16(0.0f), MLFloat16(1.0f), - MLFloat16(2.0f), MLFloat16(3.0f), + 0.0f, 1.0f, + 2.0f, 3.0f, // M = 1 - MLFloat16(4.0f), MLFloat16(5.0f), - MLFloat16(6.0f), MLFloat16(7.0f), + 4.0f, 5.0f, + 6.0f, 7.0f, // M = 2 - MLFloat16(8.0f), MLFloat16(9.0f), - MLFloat16(10.0f), MLFloat16(11.0f), + 8.0f, 9.0f, + 10.0f, 11.0f, // M = 3 - MLFloat16(12.0f), MLFloat16(13.0f), - MLFloat16(14.0f), MLFloat16(15.0f), + 12.0f, 13.0f, + 14.0f, 15.0f, // M = 4 - MLFloat16(16.0f), MLFloat16(17.0f), - MLFloat16(18.0f), MLFloat16(19.0f), + 16.0f, 17.0f, + 18.0f, 19.0f, // M = 5 - MLFloat16(20.0f), MLFloat16(21.0f), - MLFloat16(22.0f), MLFloat16(23.0f), + 20.0f, 21.0f, + 22.0f, 23.0f, // M = 6 - MLFloat16(24.0f), MLFloat16(25.0f), - MLFloat16(26.0f), MLFloat16(27.0f), + 24.0f, 25.0f, + 26.0f, 27.0f, // M = 7 - MLFloat16(28.0f), MLFloat16(29.0f), - MLFloat16(30.0f), MLFloat16(31.0f), + 28.0f, 29.0f, + 30.0f, 31.0f, // M = 8 - MLFloat16(32.0f), MLFloat16(33.0f), - MLFloat16(34.0f), MLFloat16(35.0f), + 32.0f, 33.0f, + 34.0f, 35.0f, // M = 9 - MLFloat16(36.0f), MLFloat16(37.0f), - MLFloat16(38.0f), MLFloat16(39.0f), + 36.0f, 37.0f, + 38.0f, 39.0f, // M = 10 - MLFloat16(40.0f), MLFloat16(41.0f), - MLFloat16(42.0f), MLFloat16(43.0f), + 40.0f, 41.0f, + 42.0f, 43.0f, // M = 11 - MLFloat16(44.0f), MLFloat16(45.0f), - MLFloat16(46.0f), MLFloat16(47.0f), + 44.0f, 45.0f, + 46.0f, 47.0f, // M = 12 - MLFloat16(48.0f), MLFloat16(49.0f), - MLFloat16(50.0f), MLFloat16(51.0f), + 48.0f, 49.0f, + 50.0f, 51.0f, // M = 13 - MLFloat16(52.0f), MLFloat16(53.0f), - MLFloat16(54.0f), MLFloat16(55.0f), + 52.0f, 53.0f, + 54.0f, 55.0f, // M = 14 - MLFloat16(56.0f), MLFloat16(57.0f), - MLFloat16(58.0f), MLFloat16(59.0f)}; + 56.0f, 57.0f, + 58.0f, 59.0f}; vector W_shape = {15, 1, 2, 2}; - vector B = { - MLFloat16(101.0f), - MLFloat16(102.0f), - MLFloat16(103.0f), - MLFloat16(104.0f), - MLFloat16(105.0f), - MLFloat16(106.0f), - MLFloat16(107.0f), - MLFloat16(108.0f), - MLFloat16(109.0f), - MLFloat16(110.0f), - MLFloat16(111.0f), - MLFloat16(112.0f), - MLFloat16(113.0f), - MLFloat16(114.0f), - MLFloat16(115.0f)}; + vector B = { + 101.0f, + 102.0f, + 103.0f, + 104.0f, + 105.0f, + 106.0f, + 107.0f, + 108.0f, + 109.0f, + 110.0f, + 111.0f, + 112.0f, + 113.0f, + 114.0f, + 115.0f}; vector B_shape = {15}; vector Y_shape = {1, 15, 1, 1}; auto expected_vals = { - MLFloat16(115.0f), // 0.0*0.0 + 1.0*1.0 + 2.0*2.0 + 3.0*3.0 + 101.0 - MLFloat16(228.0f), - MLFloat16(469.0f), - MLFloat16(838.0f), - MLFloat16(1335.0f), - MLFloat16(1960.0f), - MLFloat16(2713.0f), // 24.0*24.0 + 25.0*25.0 + 26.0*26.0 + 27.0*27.0 + 107.0 - MLFloat16(3594.0f), - MLFloat16(4603.0f), - MLFloat16(5740.0f), - MLFloat16(7005.0f), - MLFloat16(8398.0f), - MLFloat16(9919.0f), // 48.0*48.0 + 49.0*49.0 + 50.0*50.0 + 51.0*51.0 + 113.0 - MLFloat16(11568.0f), // 52.0*52.0 + 53.0*53.0 + 54.0*54.0 + 55.0*55.0 + 114.0 - MLFloat16(13345.0f) // 56.0*56.0 + 57.0*57.0 + 58.0*58.0 + 59.0*59.0 + 115.0 + 115.0f, // 0.0*0.0 + 1.0*1.0 + 2.0*2.0 + 3.0*3.0 + 101.0 + 228.0f, + 469.0f, + 838.0f, + 1335.0f, + 1960.0f, + 2713.0f, // 24.0*24.0 + 25.0*25.0 + 26.0*26.0 + 27.0*27.0 + 107.0 + 3594.0f, + 4603.0f, + 5740.0f, + 7005.0f, + 8398.0f, + 9919.0f, // 48.0*48.0 + 49.0*49.0 + 50.0*50.0 + 51.0*51.0 + 113.0 + 11568.0f, // 52.0*52.0 + 53.0*53.0 + 54.0*54.0 + 55.0*55.0 + 114.0 + 13345.0f // 56.0*56.0 + 57.0*57.0 + 58.0*58.0 + 59.0*59.0 + 115.0 }; - TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape); - TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true); + TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape); + TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true); } -TEST(ConvFp16Test, ConvDimWithZero) { +TEST(ConvTest, ConvDimWithZero) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{1, 1}, // dilations @@ -984,16 +919,20 @@ TEST(ConvFp16Test, ConvDimWithZero) { {} // excluded EPs }; - vector X; + vector X = vector(); vector X_shape = {0, 2, 4, 4}; // N of 0 should be handled - vector W = {MLFloat16(1.0f), MLFloat16(2.0f), MLFloat16(1.0f), MLFloat16(2.0f)}; + vector W = {1.0f, 2.0f, 1.0f, 2.0f}; vector W_shape = {2, 2, 1, 1}; vector out_shape = {0, 2, 4, 4}; - TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, {}, out_shape); + // not handled by ACL + attrs.excluded_providers.insert(kAclExecutionProvider); + + TestConvOp(attrs, {X, W}, {X_shape, W_shape}, {}, out_shape, false, optional(), + OpTester::ExpectResult::kExpectSuccess, "", 10); } -TEST(ConvFp16Test, Conv1D_asymmetric_padding) { +TEST(ConvTest, Conv1D_asymmetric_padding) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{1}, // dilations @@ -1004,20 +943,21 @@ TEST(ConvFp16Test, Conv1D_asymmetric_padding) { {} // excluded EPs }; - vector X = {MLFloat16(1.f), MLFloat16(2.f), MLFloat16(3.f)}; + vector X = {1.f, 2.f, 3.f}; vector X_shape = {1, 1, 3}; - vector W = {MLFloat16(1.f), MLFloat16(1.f), MLFloat16(1.f)}; + vector W = {1.f, 1.f, 1.f}; vector W_shape = {1, 1, 3}; - vector B = {MLFloat16()}; + vector B = {0.f}; vector B_shape = {1}; vector Y_shape = {1, 1, 2}; - auto expected_vals = {MLFloat16(3.f), MLFloat16(6.f)}; + auto expected_vals = {3.f, 6.f}; + + TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape); - TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape); - TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true); + TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true); } -TEST(ConvFp16Test, Conv_AutoPad_with_non_default_strides) { +TEST(ConvTest, Conv_AutoPad_with_non_default_strides) { ConvOpAndTestAttributes attrs = { "SAME_LOWER", // auto_pad vector{1, 1}, // dilations @@ -1028,318 +968,29 @@ TEST(ConvFp16Test, Conv_AutoPad_with_non_default_strides) { {} // excluded EPs }; - vector X = { - MLFloat16(0.0f), MLFloat16(1.0f), MLFloat16(2.0f), MLFloat16(3.0f), MLFloat16(4.0f), - MLFloat16(5.0f), MLFloat16(6.0f), MLFloat16(7.0f), MLFloat16(8.0f), MLFloat16(9.0f), - MLFloat16(10.0f), MLFloat16(11.0f), MLFloat16(12.0f), MLFloat16(13.0f), MLFloat16(14.0f), - MLFloat16(15.0f), MLFloat16(16.0f), MLFloat16(17.0f), MLFloat16(18.0f), MLFloat16(19.0f), - MLFloat16(20.0f), MLFloat16(21.0f), MLFloat16(22.0f), MLFloat16(23.0f), MLFloat16(24.0f)}; + vector X = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, + 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, + 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, + 15.0f, 16.0f, 17.0f, 18.0f, + 19.0f, 20.0f, 21.0, 22.0f, 23.0f, 24.0f}; vector X_shape = {1, 1, 5, 5}; - vector W = {MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f), - MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f), - MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f)}; + vector W = {1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f}; vector W_shape = {1, 1, 3, 3}; - auto expected_vals = {MLFloat16(12.0f), MLFloat16(27.0f), MLFloat16(24.0f), - MLFloat16(63.0f), MLFloat16(108.0f), MLFloat16(81.0f), - MLFloat16(72.0f), MLFloat16(117.0f), MLFloat16(84.0f)}; + auto expected_vals = {12.0f, 27.0f, 24.0f, + 63.0f, 108.0f, 81.0f, + 72.0f, 117.0f, 84.0f}; vector Y_shape = {1, 1, 3, 3}; - TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); - TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); -} - -TEST(ConvFp16Test, Pointwise_2D) { - ConvOpAndTestAttributes attrs = { - "", // auto_pad - vector{1, 1}, // dilations - 1, // group - vector{1, 1}, // kernel_shape - vector{0, 0, 0, 0}, // pads - vector{1, 1}, // strides - {} // excluded EPs - }; - vector X = { - MLFloat16(-9.f), MLFloat16(1.f), MLFloat16(2.f), - MLFloat16(-5.f), MLFloat16(3.f), MLFloat16(-2.f), - MLFloat16(5.f), MLFloat16(-3.f), MLFloat16(1.f), - MLFloat16(1.f), MLFloat16(8.f), MLFloat16(-4.f), - MLFloat16(-1.f), MLFloat16(6.f), MLFloat16(7.f), - MLFloat16(-1.f), MLFloat16(4.f), MLFloat16(-5.f), - MLFloat16(-9.f), MLFloat16(1.f), MLFloat16(2.f), - MLFloat16(-5.f), MLFloat16(3.f), MLFloat16(-2.f), - MLFloat16(5.f), MLFloat16(-3.f), MLFloat16(1.f)}; - vector X_shape = {1, 3, 3, 3}; - vector W = {MLFloat16(2.f), MLFloat16(-3.f), MLFloat16(0.5f), - MLFloat16(0.25f), MLFloat16(-2.f), MLFloat16(-0.75f)}; - vector W_shape = {2, 3, 1, 1}; - vector Y_shape = {1, 2, 3, 3}; - auto expected_vals = { - MLFloat16(-25.5f), MLFloat16(-21.5f), MLFloat16(17.f), - MLFloat16(-9.5f), MLFloat16(-10.5f), MLFloat16(-26.f), - MLFloat16(15.5f), MLFloat16(-19.5f), MLFloat16(17.5f), - MLFloat16(2.5f), MLFloat16(-16.5f), MLFloat16(7.f), - MLFloat16(4.5f), MLFloat16(-13.5f), MLFloat16(-13.f), - MLFloat16(-0.5f), MLFloat16(-6.5f), MLFloat16(9.5f)}; - - TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); - TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); -} - -TEST(ConvFp16Test, Pointwise_3D) { - ConvOpAndTestAttributes attrs = { - "", // auto_pad - vector{1, 1, 1}, // dilations - 1, // group - vector{1, 1, 1}, // kernel_shape - vector{0, 0, 0, 0, 0, 0}, // pads - vector{1, 1, 1}, // strides - {} // excluded EPs - }; - - vector X = { - MLFloat16(2 / 16.f), MLFloat16(3 / 16.f), MLFloat16(4 / 16.f), - MLFloat16(5 / 16.f), MLFloat16(6 / 16.f), MLFloat16(7 / 16.f), - MLFloat16(8 / 16.f), MLFloat16(9 / 16.f), MLFloat16(10 / 16.f), - MLFloat16(11 / 16.f), MLFloat16(12 / 16.f), MLFloat16(13 / 16.f), - MLFloat16(14 / 16.f), MLFloat16(15 / 16.f), MLFloat16(16 / 16.f), - MLFloat16(17 / 16.f), MLFloat16(18 / 16.f), MLFloat16(19 / 16.f), - MLFloat16(20 / 16.f), MLFloat16(21 / 16.f), MLFloat16(22 / 16.f), - MLFloat16(23 / 16.f), MLFloat16(24 / 16.f), MLFloat16(25 / 16.f), - MLFloat16(26 / 16.f), MLFloat16(27 / 16.f), MLFloat16(28 / 16.f)}; - vector X_shape = {1, 1, 3, 3, 3}; - - vector W = {MLFloat16(0.5f)}; - vector W_shape = {1, 1, 1, 1, 1}; - - auto expected_vals = { - MLFloat16(0.0625f), MLFloat16(0.09375f), MLFloat16(0.125f), - MLFloat16(0.15625f), MLFloat16(0.1875f), MLFloat16(0.21875f), - MLFloat16(0.25f), MLFloat16(0.28125f), MLFloat16(0.3125f), - MLFloat16(0.34375f), MLFloat16(0.375f), MLFloat16(0.40625f), - MLFloat16(0.4375f), MLFloat16(0.46875f), MLFloat16(0.5f), - MLFloat16(0.53125f), MLFloat16(0.5625f), MLFloat16(0.59375f), - MLFloat16(0.625f), MLFloat16(0.65625f), MLFloat16(0.6875f), - MLFloat16(0.71875f), MLFloat16(0.75f), MLFloat16(0.78125f), - MLFloat16(0.8125f), MLFloat16(0.84375f), MLFloat16(0.875f)}; - vector Y_shape = {1, 1, 3, 3, 3}; - // Test with weight as initializer - TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); - TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); -} - -#ifndef DISABLE_CONTRIB_OPS - -TEST(ConvFp16Test, Pointwise_Relu) { - ConvOpAndTestAttributes attrs = { - "", // auto_pad - vector{1, 1}, // dilations - 1, // group - vector{1, 1}, // kernel_shape - vector{0, 0, 0, 0}, // pads - vector{1, 1}, // strides - {kXnnpackExecutionProvider}, // excluded EPs - "Relu" // activation - }; - - vector X = { - MLFloat16(-9.f), MLFloat16(1.f), MLFloat16(-9.f), - MLFloat16(1.f), MLFloat16(8.f), MLFloat16(1.f), - MLFloat16(2.f), MLFloat16(-4.f), MLFloat16(2.f), - MLFloat16(-5.f), MLFloat16(-1.f), MLFloat16(-5.f), - MLFloat16(3.f), MLFloat16(6.f), MLFloat16(3.f), - MLFloat16(-2.f), MLFloat16(7.f), MLFloat16(-2.f), - MLFloat16(5.f), MLFloat16(-1.f), MLFloat16(5.f), - MLFloat16(-3.f), MLFloat16(4.f), MLFloat16(-3.f), - MLFloat16(1.f), MLFloat16(-5.f), MLFloat16(1.f)}; - vector X_shape = {1, 3, 3, 3}; - vector W = {MLFloat16(2.f), MLFloat16(-3.f), MLFloat16(0.5f), - MLFloat16(0.25f), MLFloat16(-2.f), MLFloat16(-0.75f)}; - vector W_shape = {2, 3, 1, 1}; - vector Y_shape = {1, 3, 3, 2}; - auto expected_vals = { - MLFloat16(0.f), MLFloat16(2.5f), - MLFloat16(0.f), MLFloat16(0.f), - MLFloat16(17.f), MLFloat16(7.f), - MLFloat16(0.f), MLFloat16(4.5f), - MLFloat16(0.f), MLFloat16(0.f), - MLFloat16(0.f), MLFloat16(0.f), - MLFloat16(15.5f), MLFloat16(0.f), - MLFloat16(0.f), MLFloat16(0.f), - MLFloat16(17.5f), MLFloat16(9.5f)}; - - auto run_test = [&](const ConvOpAndTestAttributes& test_attrs) { - TestConvFp16Op(test_attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); - TestConvFp16Op(test_attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); - }; - run_test(attrs); - attrs.domain = kMSInternalNHWCDomain; - attrs.excluded_providers = {kCpuExecutionProvider}; - run_test(attrs); -} - -TEST(ConvFp16Test, Conv2D_HardSigmoid) { - ConvOpAndTestAttributes attrs = { - "", // auto_pad - vector{1, 1}, // dilations - 1, // group - vector{2, 2}, // kernel_shape - vector{0, 0, 0, 0}, // pads - vector{1, 1}, // strides - {}, // excluded EPs - "HardSigmoid", // activation - vector{0.2f, 0.5f} // activation_parameters - }; - - vector X = {MLFloat16(1.0f), MLFloat16(2.0f), MLFloat16(3.0f), - MLFloat16(4.0f), MLFloat16(5.0f), MLFloat16(6.0f), - MLFloat16(7.0f), MLFloat16(8.0f), MLFloat16(9.0f)}; - vector X_shape = {1, 3, 3, 1}; - vector W = {MLFloat16(0.125f), MLFloat16(0.125f), MLFloat16(0.125f), MLFloat16(0.125f), - MLFloat16(-0.125f), MLFloat16(-0.125f), MLFloat16(-0.125f), MLFloat16(-0.125f)}; - vector W_shape = {2, 1, 2, 2}; - vector Y_shape = {1, 2, 2, 2}; - auto expected_vals = { - MLFloat16(0.8f), MLFloat16(0.2f), - MLFloat16(0.9f), MLFloat16(0.1f), - MLFloat16(1.0f), MLFloat16(0.0f), - MLFloat16(1.0f), MLFloat16(0.0f)}; - TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); - TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); -} - -TEST(ConvFp16Test, Conv2D_Bias_Z_Relu) { - ConvOpAndTestAttributes attrs = { - "", // auto_pad - vector{1, 1}, // dilations - 1, // group - vector{2, 2}, // kernel_shape - vector{0, 0, 0, 0}, // pads - vector{1, 1}, // strides - {}, // excluded EPs - "Relu" // activation - }; - - vector X = {MLFloat16(1.0f), MLFloat16(2.0f), MLFloat16(3.0f), - MLFloat16(4.0f), MLFloat16(5.0f), MLFloat16(6.0f), - MLFloat16(7.0f), MLFloat16(8.0f), MLFloat16(9.0f)}; - vector X_shape = {1, 3, 3, 1}; - vector W = {MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f), - MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f)}; - vector W_shape = {2, 1, 2, 2}; - vector Y_shape = {1, 2, 2, 2}; - vector B = {MLFloat16(1.0f), MLFloat16(-1.0f)}; - vector B_shape = {2}; - vector Z = {MLFloat16(-1.0f), MLFloat16(0.0f), MLFloat16(0.0f), MLFloat16(0.0f), - MLFloat16(0.0f), MLFloat16(0.0f), MLFloat16(0.0f), MLFloat16(1.0f)}; - vector Z_shape = {1, 2, 2, 2}; - auto expected_vals = {MLFloat16(12.0f), MLFloat16(11.0f), MLFloat16(17.0f), MLFloat16(15.0f), MLFloat16(25.0f), MLFloat16(23.0f), MLFloat16(29.0f), MLFloat16(28.0f)}; - TestConvFp16Op(attrs, {X, W, B, Z}, {X_shape, W_shape, B_shape, Z_shape}, expected_vals, Y_shape); - TestConvFp16Op(attrs, {X, W, B, Z}, {X_shape, W_shape, B_shape, Z_shape}, expected_vals, Y_shape, true); -} - -#endif // CONTRIB_OPS - -#ifndef ENABLE_TRAINING -// Prepacking is disabled in full training build so no need to test the feature in a training build. + TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); -const onnxruntime::RunOptions run_options = []() { - onnxruntime::RunOptions options{}; - ORT_THROW_IF_ERROR(options.config_options.AddConfigEntry(kOpTesterRunOptionsConfigTestTunableOp, "true")); - return options; -}(); - -const constexpr auto run_with_tunable_op = &run_options; - -TEST(ConvFp16Test, SharedPrepackedWeights) { - OpTester test("Conv", 11); - - vector X = {MLFloat16(1.0f), MLFloat16(2.0f), MLFloat16(3.0f), - MLFloat16(4.0f), MLFloat16(5.0f), MLFloat16(6.0f), - MLFloat16(7.0f), MLFloat16(8.0f), MLFloat16(9.0f)}; - vector X_shape = {1, 1, 3, 3}; - vector W = {MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f), - MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f)}; - vector W_shape = {2, 1, 2, 2}; - vector Y_shape = {1, 2, 2, 2}; - vector B = {MLFloat16(1.0f), MLFloat16(-1.0f)}; - vector B_shape = {2}; - auto expected_vals = { - MLFloat16(13.0f), MLFloat16(17.0f), MLFloat16(25.0f), MLFloat16(29.0f), - MLFloat16(11.0f), MLFloat16(15.0f), MLFloat16(23.0f), MLFloat16(27.0f)}; - - test.AddInput("X", X_shape, X); - test.AddInput("W", W_shape, W, true); - test.AddInput("B", B_shape, B, true); - test.AddOutput("Y", Y_shape, expected_vals, /*no sort*/ false, 0.002f, 0.0f); - - OrtValue w; - Tensor::InitOrtValue(DataTypeImpl::GetType(), TensorShape(W_shape), - W.data(), OrtMemoryInfo(CPU, OrtAllocatorType::OrtDeviceAllocator), w); - - SessionOptions so; - // Set up B as a shared initializer to be shared between sessions - ASSERT_EQ(so.AddInitializer("W", &w), Status::OK()); - - // We want all sessions running using this OpTester to be able to share pre-packed weights if applicable - test.EnableSharingOfPrePackedWeightsAcrossSessions(); - - // Pre-packing is limited just to the CPU EP for now and we will only test the CPU EP - // and we want to ensure that it is available in this build - auto cpu_ep = []() -> std::vector> { - std::vector> execution_providers; - execution_providers.push_back(DefaultCpuExecutionProvider()); - return execution_providers; - }; - - size_t number_of_pre_packed_weights_counter_session_1 = 0; - size_t number_of_shared_pre_packed_weights_counter = 0; - - // Session 1 - { - test.Config(so) - .Config(run_with_tunable_op) - .ConfigEps(cpu_ep()) - .RunWithConfig(&number_of_pre_packed_weights_counter_session_1, &number_of_shared_pre_packed_weights_counter); - // Assert that no pre-packed weights have been shared thus far - ASSERT_EQ(number_of_shared_pre_packed_weights_counter, static_cast(0)); - } - - auto number_of_elements_in_shared_prepacked_buffers_container = - test.GetNumPrePackedWeightsShared(); - // Assert that the number of elements in the shared container - // is the same as the number of weights that have been pre-packed - ASSERT_EQ(number_of_pre_packed_weights_counter_session_1, number_of_elements_in_shared_prepacked_buffers_container); - - // On some platforms/architectures MLAS may choose to not do any pre-packing and the number of elements - // that have been pre-packed will be zero in which case we do not continue with the testing - // of "sharing" of pre-packed weights as there are no pre-packed weights to be shared at all. - if (number_of_pre_packed_weights_counter_session_1 == 0) - return; - - // Session 2 - { - size_t number_of_pre_packed_weights_counter_session_2 = 0; - test.Config(so) - .Config(run_with_tunable_op) - .ConfigEps(cpu_ep()) - .RunWithConfig(&number_of_pre_packed_weights_counter_session_2, &number_of_shared_pre_packed_weights_counter); - - // Assert that the same number of weights were pre-packed in both sessions - ASSERT_EQ(number_of_pre_packed_weights_counter_session_1, number_of_pre_packed_weights_counter_session_2); - - // Assert that the number of pre-packed weights that were shared equals - // the number of pre-packed weights in the second session - ASSERT_EQ(number_of_pre_packed_weights_counter_session_2, - static_cast(number_of_shared_pre_packed_weights_counter)); - } + // Test with weight as initializer + TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); } -#endif - } // namespace test -} // namespace onnxruntime - -#endif // MLAS_F16VEC_INTRINSICS_SUPPORTED +} // namespace onnxruntime \ No newline at end of file From 0baa34b6fcae272b6ed7d98dfc82530f0b351c98 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 23 Dec 2024 16:24:04 +0800 Subject: [PATCH 13/17] fix --- .../test/providers/cpu/nn/conv_fp16_test.cc | 1428 ++++++++++------- 1 file changed, 879 insertions(+), 549 deletions(-) diff --git a/onnxruntime/test/providers/cpu/nn/conv_fp16_test.cc b/onnxruntime/test/providers/cpu/nn/conv_fp16_test.cc index 3569b4a6cf299..3b0d8bd9777f4 100644 --- a/onnxruntime/test/providers/cpu/nn/conv_fp16_test.cc +++ b/onnxruntime/test/providers/cpu/nn/conv_fp16_test.cc @@ -1,8 +1,14 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "core/graph/constants.h" + +#include "core/mlas/inc/mlas.h" + +#if defined(MLAS_F16VEC_INTRINSICS_SUPPORTED) || defined(COREML_ENABLE_MLPROGRAM) || defined(USE_XNNPACK) + #include "gtest/gtest.h" #include "test/providers/provider_test_utils.h" +#include "test/providers/run_options_config_keys.h" +#include "default_providers.h" using namespace std; namespace onnxruntime { @@ -18,71 +24,130 @@ struct ConvOpAndTestAttributes { vector pads; vector strides; std::unordered_set excluded_providers; + string activation = ""; + vector activation_parameters = {}; }; -void TestConvOp(const ConvOpAndTestAttributes& attributes, - const vector>& inputs, - const vector>& input_shapes, - const std::initializer_list& expected_output, - const vector& expected_output_shape, - bool weight_is_initializer = false, - optional epsilon = optional(), - OpTester::ExpectResult expect_result = OpTester::ExpectResult::kExpectSuccess, - const std::string& err_str = "", - int opset = 7, - bool exclude_cuda_nhwc = false) { - OpTester test("Conv", opset); - test.AddAttribute("group", attributes.group); - test.AddAttribute("kernel_shape", attributes.kernel_shape); +/* +Please notice that, we have predefined macros in the head of the file +#if defined(MLAS_F16VEC_INTRINSICS_SUPPORTED) || defined(COREML_ENABLE_MLPROGRAM) +When we have these two macro defines, this UT will turn into green light and work. + +If attributes.activation is set the NhwcFusedConv contrib op is used. +If you are adding support for a new EP to the test and the EP does not support NhwcFusedConv +please add the EP to the excluded_providers list. +*/ +void TestConvFp16Op(const ConvOpAndTestAttributes& attributes, + const vector>& inputs, + const vector>& input_shapes, + const std::initializer_list& expected_output, + const vector& expected_output_shape, + bool weight_is_initializer = false, + OpTester::ExpectResult expect_result = OpTester::ExpectResult::kExpectSuccess, + const std::string& err_str = "", + int opset = 11) { + std::unique_ptr tester; + if (!attributes.activation.empty()) { + tester = std::make_unique("NhwcFusedConv", 1, onnxruntime::kMSDomain); + tester->AddAttribute("activation", attributes.activation); + + if (!attributes.activation_parameters.empty()) { + tester->AddAttribute("activation_params", attributes.activation_parameters); + } + } else { + tester = std::make_unique("Conv", opset); + } + + tester->AddAttribute("group", attributes.group); + tester->AddAttribute("kernel_shape", attributes.kernel_shape); if (!attributes.dilations.empty()) { - test.AddAttribute("dilations", attributes.dilations); + tester->AddAttribute("dilations", attributes.dilations); } // Only one of pads / auto_pad can be present if (!attributes.pads.empty()) { - test.AddAttribute("pads", attributes.pads); + tester->AddAttribute("pads", attributes.pads); } else { - test.AddAttribute("auto_pad", attributes.auto_pad); + tester->AddAttribute("auto_pad", attributes.auto_pad); } if (!attributes.strides.empty()) { - test.AddAttribute("strides", attributes.strides); + tester->AddAttribute("strides", attributes.strides); } - ORT_ENFORCE(inputs.size() <= 3, "Our name array is only setup to handle 3 inputs"); - const char* szNames[] = {"X", "W", "B"}; - test.AddInput(szNames[0], input_shapes[0], inputs[0]); - test.AddInput(szNames[1], input_shapes[1], inputs[1], weight_is_initializer); - if (inputs.size() == 3) - test.AddInput(szNames[2], input_shapes[2], inputs[2]); + ORT_ENFORCE(inputs.size() <= 4, "Our name array is only setup to handle 4 inputs"); + const char* szNames[] = {"X", "W", "B", "Z"}; + tester->AddInput(szNames[0], input_shapes[0], inputs[0]); + tester->AddInput(szNames[1], input_shapes[1], inputs[1], weight_is_initializer); + if (inputs.size() >= 3) + tester->AddInput(szNames[2], input_shapes[2], inputs[2]); + if (inputs.size() >= 4) + tester->AddInput(szNames[3], input_shapes[3], inputs[3]); - test.AddOutput("Y", expected_output_shape, expected_output); - - if (epsilon.has_value()) { - test.SetOutputTolerance(*epsilon); - } + tester->AddOutput("Y", expected_output_shape, expected_output, /*no sort*/ false, 0.002f, 0.0f); std::unordered_set excluded_providers(attributes.excluded_providers); // Disable TensorRT because weight as input is not supported excluded_providers.insert(kTensorrtExecutionProvider); - - if (exclude_cuda_nhwc) { -#ifdef ENABLE_CUDA_NHWC_OPS - excluded_providers.insert(kCudaNHWCExecutionProvider); -#endif + // QNN has issue with dynamic weight, auto pad with SAME_UPPER, SAME_LOWER + if (!weight_is_initializer || attributes.auto_pad == "SAME_UPPER" || attributes.auto_pad == "SAME_LOWER") { + excluded_providers.insert(kQnnExecutionProvider); + } + if (!weight_is_initializer || !attributes.activation.empty()) { + excluded_providers.insert(kCoreMLExecutionProvider); } + tester->Run(expect_result, err_str, excluded_providers); +} - // QNN SDK 2.10.0 has a bug that breaks support for dynamic bias inputs. - excluded_providers.insert(kQnnExecutionProvider); +} // namespace + +TEST(ConvFp16Test, Conv1D_Invalid_Input_Shape) { + ConvOpAndTestAttributes attrs = { + "", // auto_pad + vector{1}, // dilations + 1, // group + vector{2}, // kernel_shape + vector{0, 0}, // pads + vector{1}, // strides + {} // excluded EPs + }; - test.Run(expect_result, err_str, excluded_providers); + vector X = vector(1, MLFloat16(1.0f)); + vector X_shape = {1, 1, 1}; + vector dummy_shape = {1, 1, 2}; + auto dummy_vals = {MLFloat16(0.0f), MLFloat16(0.0f)}; + TestConvFp16Op(attrs, {X, dummy_vals}, {X_shape, dummy_shape}, dummy_vals, dummy_shape, false, + OpTester::ExpectResult::kExpectFailure, + "Node:node1 Output:Y [ShapeInferenceError] Can't merge shape info. " + "Both inferred and declared dimension have values but they differ. Inferred=0 Declared=2 Dimension=2", + -1); // use latest opset for shape inferencing errors } -} // namespace +TEST(ConvFp16Test, Conv2D_Invalid_Input_Shape) { + ConvOpAndTestAttributes attrs = { + "", // auto_pad + vector{1, 1}, // dilations + 1, // group + vector{3, 3}, // kernel_shape + vector{0, 0, 0, 0}, // pads + vector{1, 1}, // strides + {} // excluded EPs + }; -// Conv -TEST(ConvTest, Conv1D_1) { + vector X = vector(1 * 3 * 1 * 111, MLFloat16(1.0f)); + vector X_shape = {1, 3, 1, 111}; + vector dummy_shape = {2, 2, 1, 2}; + auto dummy_vals = {MLFloat16(-0.0f), MLFloat16(0.0f), MLFloat16(-0.0f), MLFloat16(-0.0f), + MLFloat16(-0.0f), MLFloat16(0.0f), MLFloat16(-0.0f), MLFloat16(-0.0f)}; + TestConvFp16Op(attrs, {X, dummy_vals}, {X_shape, dummy_shape}, dummy_vals, dummy_shape, false, + OpTester::ExpectResult::kExpectFailure, + "Node:node1 Output:Y [ShapeInferenceError] Can't merge shape info. " + "Both inferred and declared dimension have values but they differ. Inferred=1 Declared=2 Dimension=0", + -1); // use latest opset for shape inferencing errors +} + +TEST(ConvFp16Test, Conv1D_1) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{1}, // dilations @@ -93,22 +158,21 @@ TEST(ConvTest, Conv1D_1) { {} // excluded EPs }; - vector X = {-0.21559301018714905f, 0.4691687822341919f, 0.4426700472831726f, -0.4517466723918915f, - -0.05216419696807861f, 0.29067182540893555f, 0.251010000705719f}; + vector X = {MLFloat16(-0.215576172f), MLFloat16(0.469238281f), MLFloat16(0.442626953f), + MLFloat16(-0.451660156f), MLFloat16(-0.0521545410f), MLFloat16(0.290771484f), MLFloat16(0.250976562f)}; vector X_shape = {1, 1, 7}; - vector W = {0.24472862482070923f}; + vector W = {MLFloat16(0.244750977f)}; vector W_shape = {1, 1, 1}; vector Y_shape = {1, 1, 7}; - auto expected_vals = {-0.052761781960725784f, 0.11481902748346329f, 0.10833403468132019f, -0.11055534332990646f, - -0.012766072526574135f, 0.07113571465015411f, 0.061429332941770554f}; + auto expected_vals = {MLFloat16(-0.0527624786f), MLFloat16(0.114846528f), MLFloat16(0.108333379f), + MLFloat16(-0.110544264f), MLFloat16(-0.0127648748f), MLFloat16(0.0711666048f), MLFloat16(0.0614267588f)}; - TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); - // CoreML EP requires weight to be an initializer - TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); } -TEST(ConvTest, Conv1D_1_DefaultStridesAndDilations) { +TEST(ConvFp16Test, Conv1D_1_DefaultStridesAndDilations) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{}, // dilations @@ -119,23 +183,24 @@ TEST(ConvTest, Conv1D_1_DefaultStridesAndDilations) { {} // excluded EPs }; - vector X = {-0.21559301018714905f, 0.4691687822341919f, 0.4426700472831726f, -0.4517466723918915f, - -0.05216419696807861f, 0.29067182540893555f, 0.251010000705719f}; + vector X = {MLFloat16(-0.215576172f), MLFloat16(0.469238281f), MLFloat16(0.442626953f), + MLFloat16(-0.451660156f), MLFloat16(-0.0521545410f), MLFloat16(0.290771484f), + MLFloat16(0.250976562f)}; vector X_shape = {1, 1, 7}; - vector W = {0.24472862482070923f}; + vector W = {MLFloat16(0.244750977f)}; vector W_shape = {1, 1, 1}; vector Y_shape = {1, 1, 7}; - auto expected_vals = {-0.052761781960725784f, 0.11481902748346329f, 0.10833403468132019f, -0.11055534332990646f, - -0.012766072526574135f, 0.07113571465015411f, 0.061429332941770554f}; + auto expected_vals = {MLFloat16(-0.0527624786f), MLFloat16(0.114846528f), MLFloat16(0.108333379f), + MLFloat16(-0.110544264f), MLFloat16(-0.0127648748f), MLFloat16(0.0711666048f), + MLFloat16(0.0614267588f)}; - TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); // CoreML EP requires weight to be an initializer - TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); } -// Conv3 -TEST(ConvTest, Conv1D_2) { +TEST(ConvFp16Test, Conv1D_2) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{2}, // dilations @@ -146,33 +211,32 @@ TEST(ConvTest, Conv1D_2) { {} // excluded EPs }; - vector X = {0.11094123125076294f, -0.0038032233715057373f, 0.3896123170852661f, 0.33259105682373047f, - 0.02794349193572998f, -0.08360505104064941f, -0.4100455045700073f, -0.09502679109573364f, - -0.11361867189407349f, -0.025495320558547974f, 0.3696536421775818f, 0.3529144525527954f, - -0.34991076588630676f, -0.22024285793304443f, 0.23085933923721313f, -0.4575521945953369f, - -0.17685726284980774f, -0.06030535697937012f, -0.3996139168739319f, -0.19385704398155212f, - -0.10454908013343811f, -0.14503943920135498f, -0.31941986083984375f, -0.15372398495674133f}; + vector X = {MLFloat16(0.112f), MLFloat16(-0.0038f), MLFloat16(0.382f), MLFloat16(0.332f), + MLFloat16(0.0279f), MLFloat16(-0.0836f), MLFloat16(-0.41f), MLFloat16(-0.095f), + MLFloat16(-0.113f), MLFloat16(-0.0254f), MLFloat16(0.369f), MLFloat16(0.352f), + MLFloat16(-0.349f), MLFloat16(-0.22f), MLFloat16(0.231f), MLFloat16(-0.457f), + MLFloat16(-0.176f), MLFloat16(-0.0603f), MLFloat16(-0.399f), MLFloat16(-0.193f), + MLFloat16(-0.104f), MLFloat16(-0.145f), MLFloat16(-0.319f), MLFloat16(-0.153f)}; vector X_shape = {3, 1, 8}; - vector W = {0.13225573301315308f, 0.09750443696975708f, 0.3469849228858948f, 0.4743430018424988f}; + vector W = {MLFloat16(0.132f), MLFloat16(0.0975f), MLFloat16(0.346f), MLFloat16(0.474f)}; vector W_shape = {2, 1, 2}; vector Y_shape = {3, 2, 5}; - auto expected_vals = {0.010817262344062328f, 0.05266154557466507f, 0.054253075271844864f, -0.03628557175397873f, - -0.05423086881637573f, 0.05262419581413269f, 0.22330480813980103f, 0.14844439923763275f, - -0.1848062425851822f, -0.14227961003780365f, -0.011078324168920517f, 0.02101614698767662f, - 0.014770962297916412f, -0.023767895996570587f, 0.03053247183561325f, -0.053894221782684326f, - 0.13591864705085754f, -0.03771348297595978f, -0.011907249689102173f, 0.08010470867156982f, - -0.01724436692893505f, -0.06235451623797417f, -0.06304522603750229f, -0.044972069561481476f, - -0.042245108634233475f, -0.08389100432395935f, -0.2509208619594574f, -0.18825212121009827f, - -0.18779152631759644f, -0.11083387583494186f}; - - TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); - - // CoreML EP requires weight to be an initializer - TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); + auto expected_vals = { + MLFloat16(0.0109176636f), MLFloat16(0.0520324707f), MLFloat16(0.0531311035f), MLFloat16(-0.0362854004f), + MLFloat16(-0.0540771484f), MLFloat16(0.0531005859f), MLFloat16(0.219848633f), MLFloat16(0.145385742f), + MLFloat16(-0.184692383f), MLFloat16(-0.141845703f), MLFloat16(-0.0110092163f), MLFloat16(0.0210418701f), + MLFloat16(0.0146484375f), MLFloat16(-0.0235595703f), MLFloat16(0.0304718018f), MLFloat16(-0.0535583496f), + MLFloat16(0.135864258f), MLFloat16(-0.0379028320f), MLFloat16(-0.0112762451f), MLFloat16(0.0798950195f), + MLFloat16(-0.0171508789f), MLFloat16(-0.0621032715f), MLFloat16(-0.0628051758f), MLFloat16(-0.0448303223f), + MLFloat16(-0.0421142578f), MLFloat16(-0.0834350586f), MLFloat16(-0.250000000f), MLFloat16(-0.187377930f), + MLFloat16(-0.187255859f), MLFloat16(-0.110412598f)}; + + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); } // Conv1 -TEST(ConvTest, Conv1D_Bias) { +TEST(ConvFp16Test, Conv1D_Bias) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{2}, // dilations @@ -183,41 +247,33 @@ TEST(ConvTest, Conv1D_Bias) { {} // excluded EPs }; - vector X = {0.4582272171974182f, 0.3877705931663513f, -0.05413919687271118f, -0.3013981878757477f, - 0.19299334287643433f, -0.4758569598197937f, 0.4670986533164978f, 0.4078403115272522f, - 0.24010121822357178f, 0.41645896434783936f, -0.038333237171173096f, 0.22969317436218262f, - 0.3565492033958435f, 0.12812334299087524f, 0.10096627473831177f, 0.25682520866394043f, - 0.41700226068496704f, 0.34114283323287964f, -0.429997980594635f, 0.3545404076576233f, - 0.40339237451553345f, 0.10174298286437988f, 0.45713120698928833f, 0.08574831485748291f, - 0.38086581230163574f, 0.16378509998321533f, 0.12321442365646362f, -0.19936135411262512f, - 0.26019394397735596f, -0.18406429886817932f, 0.3110783100128174f, 0.15553230047225952f, - -0.14629846811294556f, -0.1779327094554901f, -0.01390346884727478f, -0.09264758229255676f}; + vector X = {MLFloat16(0.458251953f), MLFloat16(0.387695312f), MLFloat16(-0.0541381836f), + MLFloat16(-0.301513672f), MLFloat16(0.192993164f), MLFloat16(-0.475830078f), + MLFloat16(0.467041016f), MLFloat16(0.407958984f), MLFloat16(0.240112305f), + MLFloat16(0.416503906f), MLFloat16(-0.0383300781f), MLFloat16(0.229736328f), + MLFloat16(0.356445312f), MLFloat16(0.128173828f), MLFloat16(0.100952148f), + MLFloat16(0.256835938f), MLFloat16(0.416992188f), MLFloat16(0.341064453f), + MLFloat16(-0.429931641f), MLFloat16(0.354492188f), MLFloat16(0.403320312f), + MLFloat16(0.101745605f), MLFloat16(0.457031250f), MLFloat16(0.0857543945f), + MLFloat16(0.380859375f), MLFloat16(0.163818359f), MLFloat16(0.123229980f), + MLFloat16(-0.199340820f), MLFloat16(0.260253906f), MLFloat16(-0.184082031f), + MLFloat16(0.311035156f), MLFloat16(0.155517578f), MLFloat16(-0.146240234f), + MLFloat16(-0.177978516f), MLFloat16(-0.0139007568f), MLFloat16(-0.0926513672f)}; vector X_shape = {2, 2, 9}; - vector W = {-0.17206084728240967f, 0.3236315846443176f}; + vector W = {MLFloat16(-0.172119141f), MLFloat16(0.323730469f)}; vector W_shape = {1, 2, 1}; - vector B = {0.37892162799835205f}; + vector B = {MLFloat16(0.378906250f)}; vector B_shape = {1}; vector Y_shape = {2, 1, 4}; - auto expected_vals = {0.37892162799835205f, 0.4625728130340576f, 0.4934738576412201f, 0.44801419973373413f, - 0.37892162799835205f, 0.2499445676803589f, 0.31682088971138f, 0.32773756980895996f}; - - // For the CUDA EP: Due to CUDNN Frontend using TF32 for FP32 operations we get a higher error than using FP32 only, - // as TF32 has a 10 bit mantissa. - float epsilon = 1.1e-5f; - - // This case is not supported by cuDNN frontend, and the fallback (legacy code) requires weight to 4D tensor for NHWC. - constexpr bool exclude_cuda_nhwc = true; + auto expected_vals = {MLFloat16(0.378906250f), MLFloat16(0.462597132f), MLFloat16(0.493487000f), + MLFloat16(0.447991282f), MLFloat16(0.378906250f), MLFloat16(0.249894142f), + MLFloat16(0.316803873f), MLFloat16(0.327701926f)}; - TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, false, epsilon, - OpTester::ExpectResult::kExpectSuccess, "", 10, exclude_cuda_nhwc); - - // CoreML EP requires weight to be an initializer - TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true, epsilon, - OpTester::ExpectResult::kExpectSuccess, "", 10, exclude_cuda_nhwc); + TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape); + TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true); } -// Conv47 -TEST(ConvTest, Conv2D_1) { +TEST(ConvFp16Test, Conv2D_1) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{1, 1}, // dilations @@ -228,71 +284,28 @@ TEST(ConvTest, Conv2D_1) { {} // excluded EPs }; - vector X = {-0.09103918075561523f, -0.32513630390167236f}; + vector X = {MLFloat16(-0.0910644531f), MLFloat16(-0.325195312f)}; vector X_shape = {2, 1, 1, 1}; - vector W = {0.4312484860420227f, -0.12559029459953308f, 0.44889551401138306f, -0.3100617825984955f, - 0.13522827625274658f, -0.06791308522224426f, 0.22671669721603394f, -0.17391827702522278f, - -0.31299442052841187f, -0.31545522809028625f, 0.06560015678405762f, 0.2656586766242981f, - 0.41363757848739624f, 0.31231558322906494f, -0.376018226146698f, -0.005708813667297363f, - 0.34922850131988525f, 0.45095211267471313f}; + vector W = {MLFloat16(0.431152344f), MLFloat16(-0.125610352f), MLFloat16(0.448974609f), + MLFloat16(-0.310058594f), MLFloat16(0.135253906f), MLFloat16(-0.0679321289f), + MLFloat16(0.226684570f), MLFloat16(-0.173950195f), MLFloat16(-0.312988281f), + MLFloat16(-0.315429688f), MLFloat16(0.065612793f), MLFloat16(0.265625f), + MLFloat16(0.413574219f), MLFloat16(0.312255859f), MLFloat16(-0.375976562f), + MLFloat16(-0.00571060181f), MLFloat16(0.349121094f), MLFloat16(0.450927734f)}; vector W_shape = {2, 1, 3, 3}; vector Y_shape = {2, 2, 1, 2}; - auto expected_vals = {-0.012311071157455444f, 0.02822777070105076f, -0.028432954102754593f, -0.037657227367162704f, - -0.04396762326359749f, 0.10081233829259872f, -0.10154513269662857f, -0.13448859751224518f}; + auto expected_vals = {MLFloat16(-0.012316823f), MLFloat16(0.0282353163f), + MLFloat16(-0.0284354091f), MLFloat16(-0.0376619101f), + MLFloat16(-0.0439839363f), MLFloat16(0.100829601f), + MLFloat16(-0.101544142f), MLFloat16(-0.134492397f)}; - TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); // NNAPI/CoreML EP requires weight to be an initializer - TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); -} - -TEST(ConvTest, Conv1D_Invalid_Input_Shape) { - ConvOpAndTestAttributes attrs = { - "", // auto_pad - vector{1}, // dilations - 1, // group - vector{2}, // kernel_shape - vector{0, 0}, // pads - vector{1}, // strides - {} // excluded EPs - }; - - vector X = vector(1, 1.0f); - vector X_shape = {1, 1, 1}; - vector dummy_shape = {1, 1, 2}; - auto dummy_vals = {0.0f, 0.0f}; - TestConvOp(attrs, {X, dummy_vals}, {X_shape, dummy_shape}, dummy_vals, dummy_shape, false, optional(), - OpTester::ExpectResult::kExpectFailure, - "Node:node1 Output:Y [ShapeInferenceError] Can't merge shape info. " - "Both inferred and declared dimension have values but they differ. Inferred=0 Declared=2 Dimension=2", - -1); // use latest opset for shape inferencing errors -} - -TEST(ConvTest, Conv2D_Invalid_Input_Shape) { - ConvOpAndTestAttributes attrs = { - "", // auto_pad - vector{1, 1}, // dilations - 1, // group - vector{3, 3}, // kernel_shape - vector{0, 0, 0, 0}, // pads - vector{1, 1}, // strides - {} // excluded EPs - }; - - vector X = vector(1 * 3 * 1 * 111, 1.0f); - vector X_shape = {1, 3, 1, 111}; - vector dummy_shape = {2, 2, 1, 2}; - auto dummy_vals = {-0.0f, 0.0f, -0.0f, -0.0f, - -0.0f, 0.0f, -0.0f, -0.0f}; - TestConvOp(attrs, {X, dummy_vals}, {X_shape, dummy_shape}, dummy_vals, dummy_shape, false, optional(), - OpTester::ExpectResult::kExpectFailure, - "Node:node1 Output:Y [ShapeInferenceError] Can't merge shape info. " - "Both inferred and declared dimension have values but they differ. Inferred=1 Declared=2 Dimension=0", - -1); // use latest opset for shape inferencing errors + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); } -// Conv30 -TEST(ConvTest, Conv2D_2) { +TEST(ConvFp16Test, Conv2D_2) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{1, 1}, // dilations @@ -303,43 +316,54 @@ TEST(ConvTest, Conv2D_2) { {} // excluded EPs }; - vector X = {0.45246148109436035f, 0.15498268604278564f, 0.11199361085891724f, -0.39421093463897705f, - 0.2626858949661255f, 0.13414543867111206f, -0.27184486389160156f, -0.43028733134269714f, - -0.26825493574142456f, 0.3893144130706787f, -0.13631996512413025f, -0.009590476751327515f, - -0.48771554231643677f, -0.25256502628326416f, -0.2812897562980652f, 0.4043201804161072f, - 0.07795023918151855f, 0.326981782913208f, 0.13114392757415771f, -0.4416425824165344f, - 0.12446999549865723f, 0.36739975214004517f, 0.1698915958404541f, 0.2008744478225708f, - 0.23339951038360596f, 0.38613730669021606f, 0.11117297410964966f, 0.3877097964286804f, - 0.20812749862670898f, -0.34297940135002136f, -0.029246658086776733f, -0.20483523607254028f, - -0.19244328141212463f, -0.11104947328567505f, -0.32830488681793213f, -0.01800677180290222f, - 0.3618946671485901f, -0.40949052572250366f, -0.18248388171195984f, -0.3349453806877136f, - -0.34091079235076904f, 0.006497859954833984f, 0.4537564516067505f, 0.08006560802459717f, - -0.14788749814033508f, 0.034442365169525146f, -0.33322954177856445f, 0.06049239635467529f, - 0.42619407176971436f}; + vector X = { + MLFloat16(0.452392578f), MLFloat16(0.155029297f), MLFloat16(0.111999512f), + MLFloat16(-0.394287109f), MLFloat16(0.262695312f), MLFloat16(0.134155273f), + MLFloat16(-0.271728516f), MLFloat16(-0.430175781f), MLFloat16(-0.268310547f), + MLFloat16(0.389404297f), MLFloat16(-0.136352539f), MLFloat16(-0.00959014893f), + MLFloat16(-0.487792969f), MLFloat16(-0.252685547f), MLFloat16(-0.281250000f), + MLFloat16(0.404296875f), MLFloat16(0.0779418945f), MLFloat16(0.326904297f), + MLFloat16(0.131103516f), MLFloat16(-0.441650391f), MLFloat16(0.124450684f), + MLFloat16(0.367431641f), MLFloat16(0.169921875f), MLFloat16(0.200927734f), + MLFloat16(0.233398438f), MLFloat16(0.386230469f), MLFloat16(0.111145020f), + MLFloat16(0.387695312f), MLFloat16(0.208129883f), MLFloat16(-0.343017578f), + MLFloat16(-0.0292510986f), MLFloat16(-0.204833984f), MLFloat16(-0.192382812f), + MLFloat16(-0.111022949f), MLFloat16(-0.328369141f), MLFloat16(-0.0180053711f), + MLFloat16(0.361816406f), MLFloat16(-0.409423828f), MLFloat16(-0.182495117f), + MLFloat16(-0.334960938f), MLFloat16(-0.340820312f), MLFloat16(0.00649642944f), + MLFloat16(0.453857422f), MLFloat16(0.0800781250f), MLFloat16(-0.147827148f), + MLFloat16(0.0344543457f), MLFloat16(-0.333251953f), MLFloat16(0.0604858398f), + MLFloat16(0.426269531f)}; vector X_shape = {1, 1, 7, 7}; - vector W = {-0.4406261742115021f}; + vector W = {MLFloat16(-0.440673828f)}; vector W_shape = {1, 1, 1, 1}; vector Y_shape = {1, 1, 7, 7}; - auto expected_vals = {-0.19936637580394745f, -0.06828942894935608f, -0.04934731498360634f, 0.17369966208934784f, - -0.11574628204107285f, -0.05910799279808998f, 0.1197819635272026f, 0.18959586322307587f, - 0.1182001456618309f, -0.17154212296009064f, 0.06006614491343498f, 0.0042258151806890965f, - 0.21490024030208588f, 0.11128675937652588f, 0.12394362688064575f, -0.17815405130386353f, - -0.034346915781497955f, -0.14407673478126526f, -0.05778544768691063f, 0.19459928572177887f, - -0.05484473705291748f, -0.16188594698905945f, -0.07485868036746979f, -0.08851054310798645f, - -0.10284193605184555f, -0.17014220356941223f, -0.04898572340607643f, -0.17083507776260376f, - -0.09170642495155334f, 0.1511256992816925f, 0.012886842712759972f, 0.09025576710700989f, - 0.08479554951190948f, 0.0489313043653965f, 0.14465972781181335f, 0.007934254594147205f, - -0.15946026146411896f, 0.1804322451353073f, 0.08040717244148254f, 0.1475857049226761f, - 0.15021422505378723f, -0.0028631272725760937f, -0.19993697106838226f, -0.03527900204062462f, - 0.06516310572624207f, -0.015176207758486271f, 0.14682966470718384f, -0.02665453404188156f, - -0.18779225647449493f}; - TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); + auto expected_vals = { + MLFloat16(-0.199340820f), MLFloat16(-0.0682983398f), MLFloat16(-0.0493469238f), + MLFloat16(0.173706055f), MLFloat16(-0.115783691f), MLFloat16(-0.0591125488f), + MLFloat16(0.119750977f), MLFloat16(0.189575195f), MLFloat16(0.118225098f), + MLFloat16(-0.171630859f), MLFloat16(0.0600891113f), MLFloat16(0.00422668457f), + MLFloat16(0.214965820f), MLFloat16(0.111328125f), MLFloat16(0.123962402f), + MLFloat16(-0.178222656f), MLFloat16(-0.0343322754f), MLFloat16(-0.144042969f), + MLFloat16(-0.0577697754f), MLFloat16(0.194580078f), MLFloat16(-0.0548400879f), + MLFloat16(-0.161865234f), MLFloat16(-0.0748901367f), MLFloat16(-0.0885620117f), + MLFloat16(-0.102844238f), MLFloat16(-0.170166016f), MLFloat16(-0.0489807129f), + MLFloat16(-0.170898438f), MLFloat16(-0.0917358398f), MLFloat16(0.151123047f), + MLFloat16(0.0128936768f), MLFloat16(0.0902709961f), MLFloat16(0.0847778320f), + MLFloat16(0.0489196777f), MLFloat16(0.144653320f), MLFloat16(0.00793457031f), + MLFloat16(-0.159423828f), MLFloat16(0.180419922f), MLFloat16(0.0804443359f), + MLFloat16(0.147583008f), MLFloat16(0.150146484f), MLFloat16(-0.00286293030f), + MLFloat16(-0.199951172f), MLFloat16(-0.0352783203f), MLFloat16(0.0651245117f), + MLFloat16(-0.0151824951f), MLFloat16(0.146850586f), MLFloat16(-0.0266571045f), + MLFloat16(-0.187866211f)}; + + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); // NNAPI/CoreML EP requires weight to be an initializer - TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); } -TEST(ConvTest, Conv2D_Bias_1) { +TEST(ConvFp16Test, Conv2D_Bias_1) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{1, 1}, // dilations @@ -350,23 +374,21 @@ TEST(ConvTest, Conv2D_Bias_1) { {} // excluded EPs }; - vector X = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}; + vector X = {MLFloat16(1.0f), MLFloat16(2.0f), MLFloat16(3.0f), MLFloat16(4.0f), MLFloat16(5.0f), MLFloat16(6.0f), MLFloat16(7.0f), MLFloat16(8.0f), MLFloat16(9.0f)}; vector X_shape = {1, 1, 3, 3}; - vector W = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; + vector W = {MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f)}; vector W_shape = {2, 1, 2, 2}; vector Y_shape = {1, 2, 2, 2}; - vector B = {1.0f, -1.0f}; + vector B = {MLFloat16(1.0f), MLFloat16(-1.0f)}; vector B_shape = {2}; - auto expected_vals = {13.0f, 17.0f, 25.0f, 29.0f, 11.0f, 15.0f, 23.0f, 27.0f}; - - TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape); + auto expected_vals = {MLFloat16(13.0f), MLFloat16(17.0f), MLFloat16(25.0f), MLFloat16(29.0f), MLFloat16(11.0f), MLFloat16(15.0f), MLFloat16(23.0f), MLFloat16(27.0f)}; - // NNAPI/CoreML EP requires weight to be an initializer - TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true); + TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape); + TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true); } // Conv48 -TEST(ConvTest, Conv2D_Bias_2) { +TEST(ConvFp16Test, Conv2D_Bias_2) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{1, 1}, // dilations @@ -377,46 +399,55 @@ TEST(ConvTest, Conv2D_Bias_2) { {} // excluded EPs }; - vector X = {-0.22904816269874573f, -0.20278319716453552f, -0.4723144471645355f, 0.027880489826202393f, - 0.2685856819152832f, -0.19361668825149536f, -0.39857280254364014f, 0.40285515785217285f, - 0.20966708660125732f, -0.39234158396720886f, -0.07502302527427673f, 0.4662899374961853f, - -0.2567148208618164f, -0.1186269223690033f, -0.1897754967212677f, -0.3967694342136383f, - -0.4268943667411804f, -0.344584584236145f, -0.4483465552330017f, -0.41608482599258423f, - -0.23649904131889343f, -0.4195239543914795f, 0.3277903199195862f, -0.11628741025924683f, - 0.2873995900154114f, 0.21717703342437744f, -0.26514798402786255f, 0.08272713422775269f, - 0.0050997138023376465f, -0.41409194469451904f, 0.2826550006866455f, 0.4891064763069153f, - -0.1522480845451355f, -0.2554396986961365f, 0.04099029302597046f, -0.35793858766555786f, - 0.2557554841041565f, 0.41162675619125366f, -0.06953108310699463f, 0.029517710208892822f, - 0.32956594228744507f, 0.4615175127983093f, -0.3216847777366638f, 0.15545696020126343f, - -0.3779126703739166f, -0.01712372899055481f, 0.07461833953857422f, 0.38875824213027954f, - 0.1980893611907959f, -0.19913813471794128f, -0.011296629905700684f, 0.30053526163101196f, - 0.4461088180541992f, 0.025034189224243164f, -0.3370230793952942f, -0.21012544631958008f, - -0.41627752780914307f, -0.43801137804985046f, 0.13566172122955322f, -0.47898364067077637f, - -0.45526939630508423f, -0.3007912039756775f, 0.06994932889938354f, -0.0749855637550354f, - -0.22754916548728943f, -0.469131737947464f, 0.08644282817840576f, 0.06157493591308594f, - -0.3920745849609375f, 0.458797812461853f, 0.18890488147735596f, 0.40145808458328247f}; + vector X = { + MLFloat16(-0.625f), MLFloat16(0.4375f), MLFloat16(0.0625f), + MLFloat16(-0.3125f), MLFloat16(-0.6875f), MLFloat16(0.375f), + MLFloat16(0.0625f), MLFloat16(-0.375f), MLFloat16(0.6875f), + MLFloat16(0.3125f), MLFloat16(-0.0625f), MLFloat16(-0.4375f), + MLFloat16(0.625f), MLFloat16(0.25f), MLFloat16(-0.125f), + MLFloat16(-0.5f), MLFloat16(0.5625f), MLFloat16(0.1875f), + MLFloat16(-0.1875f), MLFloat16(-0.5625f), MLFloat16(0.5f), + MLFloat16(0.125f), MLFloat16(-0.25f), MLFloat16(-0.625f), + MLFloat16(0.4375f), MLFloat16(0.0625f), MLFloat16(-0.3125f), + MLFloat16(-0.6875f), MLFloat16(0.375f), MLFloat16(0.25f), + MLFloat16(-0.375f), MLFloat16(0.6875f), MLFloat16(0.3125f), + MLFloat16(-0.0625f), MLFloat16(-0.4375f), MLFloat16(0.625f), + MLFloat16(0.25f), MLFloat16(-0.125f), MLFloat16(-0.5f), + MLFloat16(0.5625f), MLFloat16(0.1875f), MLFloat16(-0.1875f), + MLFloat16(-0.5625f), MLFloat16(0.5f), MLFloat16(0.125f), + MLFloat16(-0.25f), MLFloat16(-0.625f), MLFloat16(0.4375f), + MLFloat16(0.0625f), MLFloat16(-0.3125f), MLFloat16(-0.6875f), + MLFloat16(0.375f), MLFloat16(0.125f), MLFloat16(-0.375f), + MLFloat16(0.6875f), MLFloat16(0.3125f), MLFloat16(-0.0625f), + MLFloat16(-0.4375f), MLFloat16(0.625f), MLFloat16(0.25f), + MLFloat16(-0.125f), MLFloat16(-0.5f), MLFloat16(0.5625f), + MLFloat16(0.1875f), MLFloat16(-0.1875f), MLFloat16(-0.5625f), + MLFloat16(0.5f), MLFloat16(0.125f), MLFloat16(-0.25f), + MLFloat16(-0.625f), MLFloat16(0.4375f), MLFloat16(0.0625f)}; vector X_shape = {1, 2, 6, 6}; - vector W = {-0.48007914423942566f, -0.21048793196678162f, 0.2505034804344177f, 0.1610567569732666f, - -0.24951639771461487f, 0.1918455958366394f, 0.44247758388519287f, 0.06943017244338989f, - -0.10510382056236267f, -0.41663575172424316f, -0.3053555488586426f, -0.19126328825950623f, - -0.42332321405410767f, 0.498790979385376f, 0.081226646900177f, -0.21777048707008362f, - 0.46603143215179443f, -0.43488776683807373f, -0.3080252408981323f, -0.3844330906867981f, - -0.17214277386665344f, -0.3650006353855133f, 0.21724021434783936f, 0.1636529564857483f, - -0.22924479842185974f, 0.044009625911712646f, 0.274614155292511f, -0.06811442971229553f, - 0.450619637966156f, 0.4611729383468628f, 0.20782196521759033f, -0.3136714696884155f}; + vector W = { + MLFloat16(-0.3125f), MLFloat16(-0.6875f), MLFloat16(0.375f), MLFloat16(0.025f), + MLFloat16(-0.375f), MLFloat16(0.6875f), MLFloat16(0.3125f), MLFloat16(-0.0625f), + MLFloat16(-0.4375f), MLFloat16(0.625f), MLFloat16(0.25f), MLFloat16(-0.125f), + MLFloat16(-0.5f), MLFloat16(0.5625f), MLFloat16(0.1875f), MLFloat16(-0.1875f), + MLFloat16(-0.5625f), MLFloat16(0.5f), MLFloat16(0.125f), MLFloat16(-0.25f), + MLFloat16(-0.625f), MLFloat16(0.4375f), MLFloat16(0.0625f), MLFloat16(-0.3125f), + MLFloat16(-0.6875f), MLFloat16(0.375f), MLFloat16(-0.125f), MLFloat16(-0.375f), + MLFloat16(0.6875f), MLFloat16(0.3125f), MLFloat16(-0.0625f), MLFloat16(-0.4375f)}; vector W_shape = {1, 2, 4, 4}; - vector B = {-0.40378910303115845f}; + vector B = {MLFloat16(-0.8125f)}; vector B_shape = {1}; vector Y_shape = {1, 1, 4, 2}; - auto expected_vals = {-0.3419531583786011f, -0.6116723418235779f, -0.39677709341049194f, -0.7316848039627075f, - -0.5647197365760803f, 0.02788025140762329f, -0.30450713634490967f, -0.6786775588989258f}; + auto expected_vals = { + MLFloat16(-0.83203125f), MLFloat16(-1.40625f), MLFloat16(-0.595312476f), MLFloat16(-1.93906248f), + MLFloat16(-0.896875024f), MLFloat16(-1.53750002f), MLFloat16(-0.904687524f), MLFloat16(-1.65937495f)}; - TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape); + TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape); - TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true); + TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true); } -TEST(ConvTest, Conv2D_AutoPad1) { +TEST(ConvFp16Test, Conv2D_AutoPad1) { ConvOpAndTestAttributes attrs = { "SAME_UPPER", // auto_pad vector{1, 1}, // dilations @@ -427,26 +458,26 @@ TEST(ConvTest, Conv2D_AutoPad1) { {} // excluded EPs }; - vector X = vector(25, 1.0f); + vector X = vector(25, MLFloat16(1.0f)); vector X_shape = {1, 1, 5, 5}; - vector W = {0.0f, 1.0f, 2.0f, - 3.0f, 4.0f, 5.0f, - 6.0f, 7.0f, 8.0f}; + vector W = {MLFloat16(0.0f), MLFloat16(1.0f), MLFloat16(2.0f), + MLFloat16(3.0f), MLFloat16(4.0f), MLFloat16(5.0f), + MLFloat16(6.0f), MLFloat16(7.0f), MLFloat16(8.0f)}; vector W_shape = {1, 1, 3, 3}; vector Y_shape = {1, 1, 5, 5}; - auto expected_vals = {24.0f, 33.0f, 33.0f, 33.0f, 20.0f, - 27.0f, 36.0f, 36.0f, 36.0f, 21.0f, - 27.0f, 36.0f, 36.0f, 36.0f, 21.0f, - 27.0f, 36.0f, 36.0f, 36.0f, 21.0f, - 12.0f, 15.0f, 15.0f, 15.0f, 8.0f}; - TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); + auto expected_vals = {MLFloat16(24.0f), MLFloat16(33.0f), MLFloat16(33.0f), MLFloat16(33.0f), MLFloat16(20.0f), + MLFloat16(27.0f), MLFloat16(36.0f), MLFloat16(36.0f), MLFloat16(36.0f), MLFloat16(21.0f), + MLFloat16(27.0f), MLFloat16(36.0f), MLFloat16(36.0f), MLFloat16(36.0f), MLFloat16(21.0f), + MLFloat16(27.0f), MLFloat16(36.0f), MLFloat16(36.0f), MLFloat16(36.0f), MLFloat16(21.0f), + MLFloat16(12.0f), MLFloat16(15.0f), MLFloat16(15.0f), MLFloat16(15.0f), MLFloat16(8.0f)}; + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); // NNAPI/CoreML EP requires weight to be an initializer - TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); } -TEST(ConvTest, Conv2D_AutoPad2) { +TEST(ConvFp16Test, Conv2D_AutoPad2) { ConvOpAndTestAttributes attrs = { "SAME_LOWER", // auto_pad vector{1, 1}, // dilations @@ -457,31 +488,29 @@ TEST(ConvTest, Conv2D_AutoPad2) { {} // excluded EPs }; - vector X = {1.0f, 0.0f, 1.0f, 0.0f, 1.0f, - 1.0f, 0.0f, 1.0f, 0.0f, 1.0f, - 1.0f, 0.0f, 1.0f, 0.0f, 1.0f, - 1.0f, 0.0f, 1.0f, 0.0f, 1.0f, - 1.0f, 0.0f, 1.0f, 0.0f, 1.0f}; + vector X = {MLFloat16(1.0f), MLFloat16(0.0f), MLFloat16(1.0f), MLFloat16(0.0f), MLFloat16(1.0f), + MLFloat16(1.0f), MLFloat16(0.0f), MLFloat16(1.0f), MLFloat16(0.0f), MLFloat16(1.0f), + MLFloat16(1.0f), MLFloat16(0.0f), MLFloat16(1.0f), MLFloat16(0.0f), MLFloat16(1.0f), + MLFloat16(1.0f), MLFloat16(0.0f), MLFloat16(1.0f), MLFloat16(0.0f), MLFloat16(1.0f), + MLFloat16(1.0f), MLFloat16(0.0f), MLFloat16(1.0f), MLFloat16(0.0f), MLFloat16(1.0f)}; vector X_shape = {1, 1, 5, 5}; - vector W = {0.0f, 1.0f, 2.0f, - 3.0f, 4.0f, 5.0f, - 6.0f, 7.0f, 8.0f}; + vector W = {MLFloat16(0.0f), MLFloat16(1.0f), MLFloat16(2.0f), + MLFloat16(3.0f), MLFloat16(4.0f), MLFloat16(5.0f), + MLFloat16(6.0f), MLFloat16(7.0f), MLFloat16(8.0f)}; vector W_shape = {1, 1, 3, 3}; vector Y_shape = {1, 1, 5, 5}; - auto expected_vals = {11.0f, 22.0f, 11.0f, 22.0f, 11.0f, - 12.0f, 24.0f, 12.0f, 24.0f, 12.0f, - 12.0f, 24.0f, 12.0f, 24.0f, 12.0f, - 12.0f, 24.0f, 12.0f, 24.0f, 12.0f, - 5.0f, 10.0f, 5.0f, 10.0f, 5.0f}; - TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); - - // NNAPI/CoreML EP requires weight to be an initializer - TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); + auto expected_vals = {MLFloat16(11.0f), MLFloat16(22.0f), MLFloat16(11.0f), MLFloat16(22.0f), MLFloat16(11.0f), + MLFloat16(12.0f), MLFloat16(24.0f), MLFloat16(12.0f), MLFloat16(24.0f), MLFloat16(12.0f), + MLFloat16(12.0f), MLFloat16(24.0f), MLFloat16(12.0f), MLFloat16(24.0f), MLFloat16(12.0f), + MLFloat16(12.0f), MLFloat16(24.0f), MLFloat16(12.0f), MLFloat16(24.0f), MLFloat16(12.0f), + MLFloat16(5.0f), MLFloat16(10.0f), MLFloat16(5.0f), MLFloat16(10.0f), MLFloat16(5.0f)}; + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); + + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); } -// Conv10 -TEST(ConvTest, Conv3D_1) { +TEST(ConvFp16Test, Conv3D_1) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{1, 1, 1}, // dilations @@ -492,33 +521,35 @@ TEST(ConvTest, Conv3D_1) { {} // excluded EPs }; - vector X = {-0.43337246775627136f, -0.48385289311408997f, -0.30954962968826294f, - 0.16074687242507935f, -0.46670910716056824f, 0.46576786041259766f, - -0.37056273221969604f, 0.40604978799819946f, -0.035478413105010986f, - -0.3125576674938202f, 0.42677170038223267f, 0.39851123094558716f, - -0.3906140625476837f, 0.2590462565422058f, -0.20646807551383972f, - 0.1382436752319336f, -0.20149192214012146f, 0.10030072927474976f, - -0.2413364052772522f, 0.1231224536895752f, 0.032734215259552f, - 0.29610633850097656f, -0.23117440938949585f, 0.3345826268196106f, - 0.02567422389984131f, 0.24579226970672607f, 0.11724984645843506f}; + vector X = { + MLFloat16(-0.433349609f), MLFloat16(-0.483886719f), MLFloat16(-0.309570312f), + MLFloat16(0.160766602f), MLFloat16(-0.466796875f), MLFloat16(0.465820312f), + MLFloat16(-0.370605469f), MLFloat16(0.406005859f), MLFloat16(-0.0354919434f), + MLFloat16(-0.312500000f), MLFloat16(0.426757812f), MLFloat16(0.398437500f), + MLFloat16(-0.390625000f), MLFloat16(0.259033203f), MLFloat16(-0.206420898f), + MLFloat16(0.138183594f), MLFloat16(-0.201538086f), MLFloat16(0.100280762f), + MLFloat16(-0.241333008f), MLFloat16(0.123107910f), MLFloat16(0.0327453613f), + MLFloat16(0.296142578f), MLFloat16(-0.231201172f), MLFloat16(0.334472656f), + MLFloat16(0.0256805420f), MLFloat16(0.245849609f), MLFloat16(0.117248535f)}; vector X_shape = {1, 1, 3, 3, 3}; - vector W = {-0.44214117527008057f}; + vector W = {MLFloat16(-0.442138672f)}; vector W_shape = {1, 1, 1, 1, 1}; vector Y_shape = {1, 1, 3, 3, 3}; - auto expected_vals = {0.19161181151866913f, 0.21393129229545593f, 0.13686463236808777f, - -0.07107280939817429f, 0.20635131001472473f, -0.20593515038490295f, - 0.16384103894233704f, -0.17953133583068848f, 0.01568646728992462f, - 0.13819462060928345f, -0.1886933445930481f, -0.17619822919368744f, - 0.17270655930042267f, -0.11453501880168915f, 0.09128803759813309f, - -0.06112322211265564f, 0.08908787369728088f, -0.04434708133339882f, - 0.10670476406812668f, -0.054437506943941116f, -0.014473143965005875f, - -0.13092079758644104f, 0.10221172869205475f, -0.1479327529668808f, - -0.011351631954312325f, -0.10867488384246826f, -0.05184098333120346f}; - TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); + auto expected_vals = { + MLFloat16(0.191600621f), MLFloat16(0.213945031f), MLFloat16(0.136873007f), + MLFloat16(-0.0710811317f), MLFloat16(0.206388950f), MLFloat16(-0.205957174f), + MLFloat16(0.163859010f), MLFloat16(-0.179510891f), MLFloat16(0.0156923607f), + MLFloat16(0.138168335f), MLFloat16(-0.188686132f), MLFloat16(-0.176164627f), + MLFloat16(0.172710419f), MLFloat16(-0.114528596f), MLFloat16(0.0912666619f), + MLFloat16(-0.0610963106f), MLFloat16(0.0891077816f), MLFloat16(-0.0443380028f), + MLFloat16(0.106702656f), MLFloat16(-0.0544307679f), MLFloat16(-0.0144779906f), + MLFloat16(-0.130936086f), MLFloat16(0.102222979f), MLFloat16(-0.147883296f), + MLFloat16(-0.0113543607f), MLFloat16(-0.108699620f), MLFloat16(-0.0518401116f)}; + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); } -// Conv22 -TEST(ConvTest, Conv3D_2) { +TEST(ConvFp16Test, Conv3D_2) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{1, 1, 1}, // dilations @@ -529,39 +560,40 @@ TEST(ConvTest, Conv3D_2) { {} // excluded EPs }; - vector X = {0.010772407054901123f, -0.43806642293930054f, 0.455391526222229f, -0.28657248616218567f, - 0.45676887035369873f, -0.0320507287979126f, 0.4229400157928467f, -0.18730869889259338f, - -0.45851585268974304f, 0.042054951190948486f, -0.13332295417785645f, -0.25374430418014526f, - -0.23845627903938293f, 0.12214112281799316f, -0.1778157651424408f, 0.1891845464706421f, - 0.37962496280670166f, -0.033982306718826294f, 0.12737131118774414f, -0.040284961462020874f, - 0.46427029371261597f, -0.22687292098999023f, 0.17398333549499512f, -0.3014046251773834f, - -0.4043419063091278f, -0.33206477761268616f, 0.04655301570892334f, -0.4947906732559204f, - 0.0755157470703125f, 0.1173025369644165f, 0.47043120861053467f, 0.4824737310409546f, - -0.37734976410865784f, -0.056491583585739136f, -0.10790631175041199f, 0.043476223945617676f, - 0.24469023942947388f, -0.4100031852722168f, 0.0616222620010376f, 0.2296960949897766f, - 0.27883386611938477f, 0.08150351047515869f, 0.2453773021697998f, 0.08250969648361206f, - -0.1471814215183258f, -0.43011274933815f, 0.027180075645446777f, 0.3605625033378601f, - 0.24954384565353394f, -0.22505927085876465f, -0.36272895336151123f, -0.47674262523651123f, - 0.11275297403335571f, 0.49773406982421875f, 0.2686365246772766f, 0.025525271892547607f, - -0.3037869930267334f, 0.41126757860183716f, 0.36149072647094727f, 0.00883406400680542f, - -0.07959523797035217f, 0.3601323366165161f, 0.17322391271591187f, -0.012007325887680054f}; + vector X = { + MLFloat16(0.0107727051f), MLFloat16(-0.437988281f), MLFloat16(0.455322266f), MLFloat16(-0.286621094f), + MLFloat16(0.456787109f), MLFloat16(-0.0320434570f), MLFloat16(0.422851562f), MLFloat16(-0.187255859f), + MLFloat16(-0.458496094f), MLFloat16(0.0420532227f), MLFloat16(-0.133300781f), MLFloat16(-0.253662109f), + MLFloat16(-0.238403320f), MLFloat16(0.122131348f), MLFloat16(-0.177856445f), MLFloat16(0.189208984f), + MLFloat16(0.379638672f), MLFloat16(-0.0339965820f), MLFloat16(0.127319336f), MLFloat16(-0.0402832031f), + MLFloat16(0.464355469f), MLFloat16(-0.226928711f), MLFloat16(0.173950195f), MLFloat16(-0.301513672f), + MLFloat16(-0.404296875f), MLFloat16(-0.332031250f), MLFloat16(0.0465393066f), MLFloat16(-0.494873047f), + MLFloat16(0.0755004883f), MLFloat16(0.117309570f), MLFloat16(0.470458984f), MLFloat16(0.482421875f), + MLFloat16(-0.377441406f), MLFloat16(-0.0564880371f), MLFloat16(-0.107910156f), MLFloat16(0.0434875488f), + MLFloat16(0.244750977f), MLFloat16(-0.409912109f), MLFloat16(0.0616149902f), MLFloat16(0.229736328f), + MLFloat16(0.278808594f), MLFloat16(0.0814819336f), MLFloat16(0.245361328f), MLFloat16(0.0825195312f), + MLFloat16(-0.147216797f), MLFloat16(-0.430175781f), MLFloat16(0.0271759033f), MLFloat16(0.360595703f), + MLFloat16(0.249511719f), MLFloat16(-0.225097656f), MLFloat16(-0.362792969f), MLFloat16(-0.476806641f), + MLFloat16(0.112731934f), MLFloat16(0.497802734f), MLFloat16(0.268554688f), MLFloat16(0.0255279541f), + MLFloat16(-0.303710938f), MLFloat16(0.411376953f), MLFloat16(0.361572266f), MLFloat16(0.00883483887f), + MLFloat16(-0.0795898438f), MLFloat16(0.360107422f), MLFloat16(0.173217773f), MLFloat16(-0.0120086670f)}; vector X_shape = {1, 1, 4, 4, 4}; - vector W = {0.32824617624282837f}; + vector W = {MLFloat16(0.328125f)}; vector W_shape = {1, 1, 1, 1, 1}; vector Y_shape = {1, 1, 4, 4, 4}; - auto expected_vals = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0035360013134777546f, 0.14948052167892456f, 0.0f, - 0.0f, -0.15050607919692993f, -0.043762750923633575f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -0.12386361509561539f, -0.03541983291506767f, 0.0f, - 0.0f, 0.09152615070343018f, 0.08054415881633759f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}; - TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); + auto expected_vals = {MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), + MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), + MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(0.00353479385f), MLFloat16(0.149402618f), MLFloat16(), + MLFloat16(), MLFloat16(-0.150444031f), MLFloat16(-0.0437393188f), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), + MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(-0.123847961f), MLFloat16(-0.03540802f), MLFloat16(), + MLFloat16(), MLFloat16(0.0914840698f), MLFloat16(0.0805091858f), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), + MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), + MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16(), MLFloat16()}; + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); } -// Conv23 -TEST(ConvTest, Conv3D_Bias) { +TEST(ConvFp16Test, Conv3D_Bias) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{2, 2, 2}, // dilations @@ -572,84 +604,96 @@ TEST(ConvTest, Conv3D_Bias) { {} // excluded EPs }; - vector X = {0.46796226501464844f, -0.4613912105560303f, 0.33512794971466064f, -0.4010460674762726f, - 0.41722816228866577f, -0.048133403062820435f, 0.20415884256362915f, 0.03189706802368164f, - -0.04779183864593506f, -0.0795503556728363f, 0.4987630844116211f, 0.3506373167037964f, - 0.48065757751464844f, 0.269855260848999f, -0.2463444471359253f, 0.19044137001037598f, - -0.11830493807792664f, -0.2576887905597687f, -0.33940935134887695f, -0.257951021194458f, - -0.08279827237129211f, 0.3513314127922058f, -0.29122066497802734f, -0.43358397483825684f, - -0.13429927825927734f, 0.44032156467437744f, 0.05308258533477783f, -0.3499870300292969f, - -0.28474611043930054f, -0.44209951162338257f, -0.07418054342269897f, -0.10919415950775146f, - 0.2845439314842224f, 0.3498746156692505f, -0.19313520193099976f, 0.32609254121780396f, - 0.4880145788192749f, 0.05574071407318115f, -0.46457427740097046f, -0.02524462342262268f, - -0.18780940771102905f, -0.14720159769058228f, 0.207585871219635f, 0.47157740592956543f, - -0.05567386746406555f, -0.49871665239334106f, 0.2274145483970642f, 0.4589425325393677f, - -0.4725189805030823f, -0.4358765780925751f, 0.2841453552246094f, -0.27037882804870605f, - 0.34227508306503296f, 0.33575427532196045f, -0.19485199451446533f, -0.27679920196533203f, - -0.4238079786300659f, -0.4385119676589966f, 0.43724071979522705f, 0.3065117597579956f, - 0.45696544647216797f, 0.05291992425918579f, -0.023618370294570923f, -0.1860884726047516f, - 0.08669537305831909f, 0.32541000843048096f, 0.1846179962158203f, -0.1984834372997284f, - -0.2754465937614441f, 0.32004624605178833f, -0.34846532344818115f, 0.0999596118927002f, - -0.11374691128730774f, 0.21225297451019287f, -0.02315312623977661f, 0.1671370267868042f, - 0.22319108247756958f, 0.03609824180603027f, -0.1587022840976715f, 0.059984564781188965f, - -0.03951650857925415f, -0.4841443598270416f, 0.32919085025787354f, -0.23115816712379456f, - 0.39441078901290894f, -0.3554944396018982f, -0.17022761702537537f, -0.055081307888031006f, - 0.15856128931045532f, -0.4183449149131775f, -0.2474445104598999f, 0.03603637218475342f, - -0.2836887538433075f, 0.4602506160736084f, 0.29092925786972046f, -0.199321448802948f, - 0.380856454372406f, -0.13847029209136963f, -0.238397479057312f, -0.1907123327255249f, - -0.11061936616897583f, -0.08717870712280273f, 0.24449139833450317f, -0.14727482199668884f, - 0.1437196135520935f, 0.3955056071281433f, -0.12538021802902222f, 0.11590522527694702f, - 0.4598066806793213f, -0.30005723237991333f, -0.46578651666641235f, -0.33955082297325134f, - -0.2671887278556824f, 0.3611910939216614f, -0.11423084139823914f, -0.08382436633110046f, - -0.31819307804107666f, 0.14515334367752075f, 0.3157258629798889f, 0.33179205656051636f, - -0.2558857202529907f, 0.11888682842254639f, 0.12824326753616333f, -0.33106181025505066f, - 0.2549159526824951f, -0.46760573983192444f, -0.11983257532119751f, 0.1834418773651123f}; + vector X = { + MLFloat16(0.468017578f), MLFloat16(-0.461425781f), MLFloat16(0.335205078f), MLFloat16(-0.401123047f), + MLFloat16(0.417236328f), MLFloat16(-0.0481262207f), MLFloat16(0.204101562f), MLFloat16(0.0318908691f), + MLFloat16(-0.0477905273f), MLFloat16(-0.0795288086f), MLFloat16(0.498779297f), MLFloat16(0.350585938f), + MLFloat16(0.480712891f), MLFloat16(0.269775391f), MLFloat16(-0.246337891f), MLFloat16(0.190429688f), + MLFloat16(-0.118286133f), MLFloat16(-0.257568359f), MLFloat16(-0.339355469f), MLFloat16(-0.258056641f), + MLFloat16(-0.0828247070f), MLFloat16(0.351318359f), MLFloat16(-0.291259766f), MLFloat16(-0.433593750f), + MLFloat16(-0.134277344f), MLFloat16(0.440429688f), MLFloat16(0.0530700684f), MLFloat16(-0.350097656f), + MLFloat16(-0.284667969f), MLFloat16(-0.442138672f), MLFloat16(-0.0741577148f), MLFloat16(-0.109191895f), + MLFloat16(0.284423828f), MLFloat16(0.349853516f), MLFloat16(-0.193115234f), MLFloat16(0.326171875f), + MLFloat16(0.488037109f), MLFloat16(0.0557556152f), MLFloat16(-0.464599609f), MLFloat16(-0.0252380371f), + MLFloat16(-0.187866211f), MLFloat16(-0.147216797f), MLFloat16(0.207641602f), MLFloat16(0.471679688f), + MLFloat16(-0.0556640625f), MLFloat16(-0.498779297f), MLFloat16(0.227416992f), MLFloat16(0.458984375f), + MLFloat16(-0.472412109f), MLFloat16(-0.435791016f), MLFloat16(0.284179688f), MLFloat16(-0.270263672f), + MLFloat16(0.342285156f), MLFloat16(0.335693359f), MLFloat16(-0.194824219f), MLFloat16(-0.276855469f), + MLFloat16(-0.423828125f), MLFloat16(-0.438476562f), MLFloat16(0.437255859f), MLFloat16(0.306396484f), + MLFloat16(0.457031250f), MLFloat16(0.0529174805f), MLFloat16(-0.0236206055f), MLFloat16(-0.186035156f), + MLFloat16(0.0866699219f), MLFloat16(0.325439453f), MLFloat16(0.184570312f), MLFloat16(-0.198486328f), + MLFloat16(-0.275390625f), MLFloat16(0.320068359f), MLFloat16(-0.348388672f), MLFloat16(0.0999755859f), + MLFloat16(-0.113769531f), MLFloat16(0.212280273f), MLFloat16(-0.0231475830f), MLFloat16(0.167114258f), + MLFloat16(0.223144531f), MLFloat16(0.0361022949f), MLFloat16(-0.158691406f), MLFloat16(0.0599975586f), + MLFloat16(-0.0395202637f), MLFloat16(-0.484130859f), MLFloat16(0.329101562f), MLFloat16(-0.231201172f), + MLFloat16(0.394531250f), MLFloat16(-0.355468750f), MLFloat16(-0.170288086f), MLFloat16(-0.0550842285f), + MLFloat16(0.158569336f), MLFloat16(-0.418457031f), MLFloat16(-0.247436523f), MLFloat16(0.0360412598f), + MLFloat16(-0.283691406f), MLFloat16(0.460205078f), MLFloat16(0.291015625f), MLFloat16(-0.199340820f), + MLFloat16(0.380859375f), MLFloat16(-0.138427734f), MLFloat16(-0.238403320f), MLFloat16(-0.190673828f), + MLFloat16(-0.110595703f), MLFloat16(-0.0871582031f), MLFloat16(0.244506836f), MLFloat16(-0.147216797f), + MLFloat16(0.143676758f), MLFloat16(0.395507812f), MLFloat16(-0.125366211f), MLFloat16(0.115905762f), + MLFloat16(0.459716797f), MLFloat16(-0.300048828f), MLFloat16(-0.465820312f), MLFloat16(-0.339599609f), + MLFloat16(-0.267089844f), MLFloat16(0.361083984f), MLFloat16(-0.114257812f), MLFloat16(-0.0838012695f), + MLFloat16(-0.318115234f), MLFloat16(0.145141602f), MLFloat16(0.315673828f), MLFloat16(0.331787109f), + MLFloat16(-0.255859375f), MLFloat16(0.118896484f), MLFloat16(0.128295898f), MLFloat16(-0.331054688f), + MLFloat16(0.254882812f), MLFloat16(-0.467529297f), MLFloat16(-0.119812012f), MLFloat16(0.183471680f)}; vector X_shape = {2, 1, 4, 4, 4}; - vector W = {0.388077974319458f, -0.16366064548492432f, -0.42871910333633423f, 0.4276432394981384f, - 0.21517693996429443f, 0.007908165454864502f, 0.33897721767425537f, 0.21843165159225464f, - 0.34095364809036255f, -0.17043980956077576f, -0.013571739196777344f, -0.26793742179870605f, - -0.34863436222076416f, -0.2672275900840759f, -0.36691007018089294f, 0.37296557426452637f}; + vector W = { + MLFloat16(0.388183594f), MLFloat16(-0.163696289f), + MLFloat16(-0.428710938f), MLFloat16(0.427734375f), + MLFloat16(0.215209961f), MLFloat16(0.00791168213f), + MLFloat16(0.338867188f), MLFloat16(0.218383789f), + MLFloat16(0.341064453f), MLFloat16(-0.170410156f), + MLFloat16(-0.0135726929f), MLFloat16(-0.267822266f), + MLFloat16(-0.348632812f), MLFloat16(-0.267333984f), + MLFloat16(-0.366943359f), MLFloat16(0.373046875f)}; vector W_shape = {2, 1, 2, 2, 2}; - vector B = {0.4310183525085449f, -0.4564093053340912f}; + vector B = {MLFloat16(0.430908203f), MLFloat16(-0.456298828f)}; vector B_shape = {2}; vector Y_shape = {2, 2, 3, 3, 3}; - auto expected_vals = {0.5332361459732056f, 0.6628494262695312f, 0.544619083404541f, 0.4242798388004303f, - 0.6271085739135742f, 0.6721994876861572f, 0.43064039945602417f, 0.4246789515018463f, - 0.53834068775177f, 0.6932926177978516f, 0.42797625064849854f, 0.2218741625547409f, - 0.29522019624710083f, 0.8329390287399292f, 0.37605351209640503f, 0.43735477328300476f, - 0.2920728623867035f, 0.6692450046539307f, 0.5527016520500183f, 0.22643595933914185f, - 0.5138190984725952f, 0.3041342794895172f, 0.7423423528671265f, 0.26707080006599426f, - 0.4617553651332855f, 0.32416003942489624f, 0.511577844619751f, -0.28187549114227295f, - -0.5031181573867798f, -0.5793710947036743f, -0.5992864370346069f, -0.5055556893348694f, - -0.7562476396560669f, -0.44363799691200256f, -0.5730307102203369f, -0.6302952766418457f, - -0.4756688177585602f, -0.728988528251648f, -0.3900943398475647f, -0.6694478988647461f, - -0.38822290301322937f, -0.35774707794189453f, -0.39807581901550293f, -0.547709047794342f, - -0.35872578620910645f, -0.5326492786407471f, -0.40852290391921997f, -0.4537881314754486f, - -0.4545857608318329f, -0.379546195268631f, -0.5250767469406128f, -0.42439910769462585f, - -0.5558245182037354f, -0.38563215732574463f, 0.44995537400245667f, 0.5007325410842896f, - 0.49359965324401855f, 0.40685802698135376f, 0.407518208026886f, 0.4628955125808716f, - 0.4301188290119171f, 0.40635955333709717f, 0.4260363280773163f, 0.55128413438797f, - 0.5498291254043579f, 0.27105778455734253f, 0.40259143710136414f, 0.5747092962265015f, - 0.4187920391559601f, 0.4507707953453064f, 0.420598566532135f, 0.3950541913509369f, - 0.593889057636261f, 0.16578882932662964f, 0.5332239270210266f, 0.43014785647392273f, - 0.50260329246521f, 0.39225444197654724f, 0.4074971079826355f, 0.5073125958442688f, - 0.3823610544204712f, -0.4240749180316925f, -0.41936254501342773f, -0.5241475105285645f, - -0.5220003724098206f, -0.502869725227356f, -0.5122783780097961f, -0.4260129928588867f, - -0.4105660617351532f, -0.4483373165130615f, -0.33759188652038574f, -0.735706090927124f, - -0.3714444637298584f, -0.4888814687728882f, -0.6191370487213135f, -0.2640320658683777f, - -0.47542816400527954f, -0.5078460574150085f, -0.4205915927886963f, -0.5584549903869629f, - -0.39770257472991943f, -0.45317384600639343f, -0.5598302483558655f, -0.2542789578437805f, - -0.5359901785850525f, -0.48090484738349915f, -0.38603779673576355f, -0.4991581439971924f}; - - // For the CUDA EP: Due to CUDNN Frontend using TF32 for FP32 operations we get a higher error than using FP32 only, - // as TF32 has a 10 bit mantissa. - float epsilon = 2.1e-4f; - - TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, false, epsilon); + auto expected_vals = { + MLFloat16(0.533115625f), MLFloat16(0.662707329f), MLFloat16(0.544498205f), + MLFloat16(0.424174339f), MLFloat16(0.627012968f), MLFloat16(0.672067642f), + MLFloat16(0.430530101f), MLFloat16(0.424569398f), MLFloat16(0.538250446f), + MLFloat16(0.693208933f), MLFloat16(0.427851349f), MLFloat16(0.221761703f), + MLFloat16(0.295077145f), MLFloat16(0.832913339f), MLFloat16(0.375999779f), + MLFloat16(0.437245011f), MLFloat16(0.291920483f), MLFloat16(0.669212699f), + MLFloat16(0.552566051f), MLFloat16(0.226370573f), MLFloat16(0.513698816f), + MLFloat16(0.303992242f), MLFloat16(0.742284894f), MLFloat16(0.266925812f), + MLFloat16(0.461661220f), MLFloat16(0.323991477f), MLFloat16(0.511511266f), + MLFloat16(-0.281706333f), MLFloat16(-0.502987564f), MLFloat16(-0.579300106f), + MLFloat16(-0.599243939f), MLFloat16(-0.505472362f), MLFloat16(-0.756186068f), + MLFloat16(-0.443522811f), MLFloat16(-0.572978139f), MLFloat16(-0.630189657f), + MLFloat16(-0.475540936f), MLFloat16(-0.728834927f), MLFloat16(-0.389986098f), + MLFloat16(-0.669373453f), MLFloat16(-0.387869477f), MLFloat16(-0.357608467f), + MLFloat16(-0.397931814f), MLFloat16(-0.547608852f), MLFloat16(-0.358573616f), + MLFloat16(-0.532473862f), MLFloat16(-0.408438683f), MLFloat16(-0.453677744f), + MLFloat16(-0.454452783f), MLFloat16(-0.379444361f), MLFloat16(-0.524981856f), + MLFloat16(-0.424284518f), MLFloat16(-0.555757523f), MLFloat16(-0.385479659f), + MLFloat16(0.449835509f), MLFloat16(0.500584960f), MLFloat16(0.493453026f), + MLFloat16(0.406748474f), MLFloat16(0.407412887f), MLFloat16(0.462785602f), + MLFloat16(0.430008084f), MLFloat16(0.406240731f), MLFloat16(0.425926626f), + MLFloat16(0.551153421f), MLFloat16(0.549696267f), MLFloat16(0.270993829f), + MLFloat16(0.402447432f), MLFloat16(0.574599743f), MLFloat16(0.418689728f), + MLFloat16(0.450668573f), MLFloat16(0.420462728f), MLFloat16(0.394942641f), + MLFloat16(0.593814850f), MLFloat16(0.165656328f), MLFloat16(0.533114314f), + MLFloat16(0.430018425f), MLFloat16(0.502558053f), MLFloat16(0.392109811f), + MLFloat16(0.407388866f), MLFloat16(0.507203162f), MLFloat16(0.382243097f), + MLFloat16(-0.423966885f), MLFloat16(-0.419248402f), MLFloat16(-0.524025679f), + MLFloat16(-0.521910012f), MLFloat16(-0.502744913f), MLFloat16(-0.512152255f), + MLFloat16(-0.425884366f), MLFloat16(-0.410446912f), MLFloat16(-0.448228836f), + MLFloat16(-0.337432563f), MLFloat16(-0.735596657f), MLFloat16(-0.371323436f), + MLFloat16(-0.488816738f), MLFloat16(-0.618983328f), MLFloat16(-0.263916761f), + MLFloat16(-0.475321025f), MLFloat16(-0.507732749f), MLFloat16(-0.420486867f), + MLFloat16(-0.558301449f), MLFloat16(-0.397618413f), MLFloat16(-0.453063041f), + MLFloat16(-0.559680939f), MLFloat16(-0.254149109f), MLFloat16(-0.535908163f), + MLFloat16(-0.480782807f), MLFloat16(-0.385932118f), MLFloat16(-0.499056786f)}; + TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape); + TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true); } -TEST(ConvTest, Conv2D_group) { +TEST(ConvFp16Test, Conv2D_group) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{1, 1}, // dilations @@ -660,20 +704,28 @@ TEST(ConvTest, Conv2D_group) { {} // excluded EPs }; - vector X = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f}; + vector X = { + MLFloat16(0.0f), MLFloat16(1.0f), MLFloat16(2.0f), MLFloat16(3.0f), + MLFloat16(4.0f), MLFloat16(5.0f), MLFloat16(6.0f), MLFloat16(7.0f), + MLFloat16(8.0f), MLFloat16(9.0f), MLFloat16(10.0f), MLFloat16(11.0f), + MLFloat16(12.0f), MLFloat16(13.0f), MLFloat16(14.0f), MLFloat16(15.0f), + MLFloat16(16.0f), MLFloat16(17.0f)}; vector X_shape = {1, 2, 3, 3}; - vector W = {1.0f, 2.0f}; + vector W = {MLFloat16(1.0f), MLFloat16(2.0f)}; vector W_shape = {2, 1, 1, 1}; vector Y_shape = {1, 2, 3, 3}; - auto expected_vals = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 18.0f, 20.0f, 22.0f, 24.0f, 26.0f, 28.0f, 30.0f, 32.0f, 34.0f}; - - TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); - - // NNAPI/CoreML EP requires weight to be an initializer - TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); + auto expected_vals = { + MLFloat16(0.0f), MLFloat16(1.0f), MLFloat16(2.0f), MLFloat16(3.0f), + MLFloat16(4.0f), MLFloat16(5.0f), MLFloat16(6.0f), MLFloat16(7.0f), + MLFloat16(8.0f), MLFloat16(18.0f), MLFloat16(20.0f), MLFloat16(22.0f), + MLFloat16(24.0f), MLFloat16(26.0f), MLFloat16(28.0f), MLFloat16(30.0f), + MLFloat16(32.0f), MLFloat16(34.0f)}; + + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); } -TEST(ConvTest, Depthwise2D_Bias_Group1_Issue18992) { +TEST(ConvFp16Test, Depthwise2D_Bias_Group1_Issue18992) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{1, 1}, // dilations @@ -684,20 +736,20 @@ TEST(ConvTest, Depthwise2D_Bias_Group1_Issue18992) { {} // excluded EPs }; - vector X = {1.0f}; + vector X = {MLFloat16(1.0f)}; vector X_shape = {1, 1, 1, 1}; - vector W = {0.5f}; + vector W = {MLFloat16(0.5f)}; vector W_shape = {1, 1, 1, 1}; - vector B = {0.5f}; + vector B = {MLFloat16(0.5f)}; vector B_shape = {1}; vector Y_shape = {1, 1, 1, 1}; - auto expected_vals = {1.0f}; + auto expected_vals = {MLFloat16(1.0f)}; - TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape); - TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true); + TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape); + TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true); } -TEST(ConvTest, Depthwise2D_Bias_Group2) { +TEST(ConvFp16Test, Depthwise2D_Bias_Group2) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{1, 1}, // dilations @@ -708,34 +760,34 @@ TEST(ConvTest, Depthwise2D_Bias_Group2) { {} // excluded EPs }; - vector X = { - 0.0f, 1.0f, 2.0f, - 3.0f, 4.0f, 5.0f, - 6.0f, 7.0f, 8.0f, + vector X = { + MLFloat16(0.0f), MLFloat16(1.0f), MLFloat16(2.0f), + MLFloat16(3.0f), MLFloat16(4.0f), MLFloat16(5.0f), + MLFloat16(6.0f), MLFloat16(7.0f), MLFloat16(8.0f), - 9.0f, 10.0f, 11.0f, - 12.0f, 13.0f, 14.0f, - 15.0f, 16.0f, 17.0f}; + MLFloat16(9.0f), MLFloat16(10.0f), MLFloat16(11.0f), + MLFloat16(12.0f), MLFloat16(13.0f), MLFloat16(14.0f), + MLFloat16(15.0f), MLFloat16(16.0f), MLFloat16(17.0f)}; vector X_shape = {1, 2, 3, 3}; - vector W = {1.0f, 2.0f}; + vector W = {MLFloat16(1.0f), MLFloat16(2.0f)}; vector W_shape = {2, 1, 1, 1}; - vector B = {1.0f, -1.0f}; + vector B = {MLFloat16(1.0f), MLFloat16(-1.0f)}; vector B_shape = {2}; vector Y_shape = {1, 2, 3, 3}; auto expected_vals = { - 1.0f, 2.0f, 3.0f, - 4.0f, 5.0f, 6.0f, - 7.0f, 8.0f, 9.0f, + MLFloat16(1.0f), MLFloat16(2.0f), MLFloat16(3.0f), + MLFloat16(4.0f), MLFloat16(5.0f), MLFloat16(6.0f), + MLFloat16(7.0f), MLFloat16(8.0f), MLFloat16(9.0f), - 17.0f, 19.0f, 21.0f, - 23.0f, 25.0f, 27.0f, - 29.0f, 31.0f, 33.0f}; + MLFloat16(17.0f), MLFloat16(19.0f), MLFloat16(21.0f), + MLFloat16(23.0f), MLFloat16(25.0f), MLFloat16(27.0f), + MLFloat16(29.0f), MLFloat16(31.0f), MLFloat16(33.0f)}; - TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape); - TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true); + TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape); + TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true); } -TEST(ConvTest, Depthwise2D_Bias_Group15) { +TEST(ConvFp16Test, Depthwise2D_Bias_Group15) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{1, 1}, // dilations @@ -746,169 +798,169 @@ TEST(ConvTest, Depthwise2D_Bias_Group15) { {} // excluded EPs }; - vector X = { + vector X = { // C = 0 - 0.0f, 1.0f, - 2.0f, 3.0f, + MLFloat16(0.0f), MLFloat16(1.0f), + MLFloat16(2.0f), MLFloat16(3.0f), // C = 1 - 4.0f, 5.0f, - 6.0f, 7.0f, + MLFloat16(4.0f), MLFloat16(5.0f), + MLFloat16(6.0f), MLFloat16(7.0f), // C = 2 - 8.0f, 9.0f, - 10.0f, 11.0f, + MLFloat16(8.0f), MLFloat16(9.0f), + MLFloat16(10.0f), MLFloat16(11.0f), // C = 3 - 12.0f, 13.0f, - 14.0f, 15.0f, + MLFloat16(12.0f), MLFloat16(13.0f), + MLFloat16(14.0f), MLFloat16(15.0f), // C = 4 - 16.0f, 17.0f, - 18.0f, 19.0f, + MLFloat16(16.0f), MLFloat16(17.0f), + MLFloat16(18.0f), MLFloat16(19.0f), // C = 5 - 20.0f, 21.0f, - 22.0f, 23.0f, + MLFloat16(20.0f), MLFloat16(21.0f), + MLFloat16(22.0f), MLFloat16(23.0f), // C = 6 - 24.0f, 25.0f, - 26.0f, 27.0f, + MLFloat16(24.0f), MLFloat16(25.0f), + MLFloat16(26.0f), MLFloat16(27.0f), // C = 7 - 28.0f, 29.0f, - 30.0f, 31.0f, + MLFloat16(28.0f), MLFloat16(29.0f), + MLFloat16(30.0f), MLFloat16(31.0f), // C = 8 - 32.0f, 33.0f, - 34.0f, 35.0f, + MLFloat16(32.0f), MLFloat16(33.0f), + MLFloat16(34.0f), MLFloat16(35.0f), // C = 9 - 36.0f, 37.0f, - 38.0f, 39.0f, + MLFloat16(36.0f), MLFloat16(37.0f), + MLFloat16(38.0f), MLFloat16(39.0f), // C = 10 - 40.0f, 41.0f, - 42.0f, 43.0f, + MLFloat16(40.0f), MLFloat16(41.0f), + MLFloat16(42.0f), MLFloat16(43.0f), // C = 11 - 44.0f, 45.0f, - 46.0f, 47.0f, + MLFloat16(44.0f), MLFloat16(45.0f), + MLFloat16(46.0f), MLFloat16(47.0f), // C = 12 - 48.0f, 49.0f, - 50.0f, 51.0f, + MLFloat16(48.0f), MLFloat16(49.0f), + MLFloat16(50.0f), MLFloat16(51.0f), // C = 13 - 52.0f, 53.0f, - 54.0f, 55.0f, + MLFloat16(52.0f), MLFloat16(53.0f), + MLFloat16(54.0f), MLFloat16(55.0f), // C = 14 - 56.0f, 57.0f, - 58.0f, 59.0f}; + MLFloat16(56.0f), MLFloat16(57.0f), + MLFloat16(58.0f), MLFloat16(59.0f)}; vector X_shape = {1, 15, 2, 2}; - vector W = { + vector W = { // M = 0 - 0.0f, 1.0f, - 2.0f, 3.0f, + MLFloat16(0.0f), MLFloat16(1.0f), + MLFloat16(2.0f), MLFloat16(3.0f), // M = 1 - 4.0f, 5.0f, - 6.0f, 7.0f, + MLFloat16(4.0f), MLFloat16(5.0f), + MLFloat16(6.0f), MLFloat16(7.0f), // M = 2 - 8.0f, 9.0f, - 10.0f, 11.0f, + MLFloat16(8.0f), MLFloat16(9.0f), + MLFloat16(10.0f), MLFloat16(11.0f), // M = 3 - 12.0f, 13.0f, - 14.0f, 15.0f, + MLFloat16(12.0f), MLFloat16(13.0f), + MLFloat16(14.0f), MLFloat16(15.0f), // M = 4 - 16.0f, 17.0f, - 18.0f, 19.0f, + MLFloat16(16.0f), MLFloat16(17.0f), + MLFloat16(18.0f), MLFloat16(19.0f), // M = 5 - 20.0f, 21.0f, - 22.0f, 23.0f, + MLFloat16(20.0f), MLFloat16(21.0f), + MLFloat16(22.0f), MLFloat16(23.0f), // M = 6 - 24.0f, 25.0f, - 26.0f, 27.0f, + MLFloat16(24.0f), MLFloat16(25.0f), + MLFloat16(26.0f), MLFloat16(27.0f), // M = 7 - 28.0f, 29.0f, - 30.0f, 31.0f, + MLFloat16(28.0f), MLFloat16(29.0f), + MLFloat16(30.0f), MLFloat16(31.0f), // M = 8 - 32.0f, 33.0f, - 34.0f, 35.0f, + MLFloat16(32.0f), MLFloat16(33.0f), + MLFloat16(34.0f), MLFloat16(35.0f), // M = 9 - 36.0f, 37.0f, - 38.0f, 39.0f, + MLFloat16(36.0f), MLFloat16(37.0f), + MLFloat16(38.0f), MLFloat16(39.0f), // M = 10 - 40.0f, 41.0f, - 42.0f, 43.0f, + MLFloat16(40.0f), MLFloat16(41.0f), + MLFloat16(42.0f), MLFloat16(43.0f), // M = 11 - 44.0f, 45.0f, - 46.0f, 47.0f, + MLFloat16(44.0f), MLFloat16(45.0f), + MLFloat16(46.0f), MLFloat16(47.0f), // M = 12 - 48.0f, 49.0f, - 50.0f, 51.0f, + MLFloat16(48.0f), MLFloat16(49.0f), + MLFloat16(50.0f), MLFloat16(51.0f), // M = 13 - 52.0f, 53.0f, - 54.0f, 55.0f, + MLFloat16(52.0f), MLFloat16(53.0f), + MLFloat16(54.0f), MLFloat16(55.0f), // M = 14 - 56.0f, 57.0f, - 58.0f, 59.0f}; + MLFloat16(56.0f), MLFloat16(57.0f), + MLFloat16(58.0f), MLFloat16(59.0f)}; vector W_shape = {15, 1, 2, 2}; - vector B = { - 101.0f, - 102.0f, - 103.0f, - 104.0f, - 105.0f, - 106.0f, - 107.0f, - 108.0f, - 109.0f, - 110.0f, - 111.0f, - 112.0f, - 113.0f, - 114.0f, - 115.0f}; + vector B = { + MLFloat16(101.0f), + MLFloat16(102.0f), + MLFloat16(103.0f), + MLFloat16(104.0f), + MLFloat16(105.0f), + MLFloat16(106.0f), + MLFloat16(107.0f), + MLFloat16(108.0f), + MLFloat16(109.0f), + MLFloat16(110.0f), + MLFloat16(111.0f), + MLFloat16(112.0f), + MLFloat16(113.0f), + MLFloat16(114.0f), + MLFloat16(115.0f)}; vector B_shape = {15}; vector Y_shape = {1, 15, 1, 1}; auto expected_vals = { - 115.0f, // 0.0*0.0 + 1.0*1.0 + 2.0*2.0 + 3.0*3.0 + 101.0 - 228.0f, - 469.0f, - 838.0f, - 1335.0f, - 1960.0f, - 2713.0f, // 24.0*24.0 + 25.0*25.0 + 26.0*26.0 + 27.0*27.0 + 107.0 - 3594.0f, - 4603.0f, - 5740.0f, - 7005.0f, - 8398.0f, - 9919.0f, // 48.0*48.0 + 49.0*49.0 + 50.0*50.0 + 51.0*51.0 + 113.0 - 11568.0f, // 52.0*52.0 + 53.0*53.0 + 54.0*54.0 + 55.0*55.0 + 114.0 - 13345.0f // 56.0*56.0 + 57.0*57.0 + 58.0*58.0 + 59.0*59.0 + 115.0 + MLFloat16(115.0f), // 0.0*0.0 + 1.0*1.0 + 2.0*2.0 + 3.0*3.0 + 101.0 + MLFloat16(228.0f), + MLFloat16(469.0f), + MLFloat16(838.0f), + MLFloat16(1335.0f), + MLFloat16(1960.0f), + MLFloat16(2713.0f), // 24.0*24.0 + 25.0*25.0 + 26.0*26.0 + 27.0*27.0 + 107.0 + MLFloat16(3594.0f), + MLFloat16(4603.0f), + MLFloat16(5740.0f), + MLFloat16(7005.0f), + MLFloat16(8398.0f), + MLFloat16(9919.0f), // 48.0*48.0 + 49.0*49.0 + 50.0*50.0 + 51.0*51.0 + 113.0 + MLFloat16(11568.0f), // 52.0*52.0 + 53.0*53.0 + 54.0*54.0 + 55.0*55.0 + 114.0 + MLFloat16(13345.0f) // 56.0*56.0 + 57.0*57.0 + 58.0*58.0 + 59.0*59.0 + 115.0 }; - TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape); - TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true); + TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape); + TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true); } -TEST(ConvTest, ConvDimWithZero) { +TEST(ConvFp16Test, ConvDimWithZero) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{1, 1}, // dilations @@ -919,20 +971,16 @@ TEST(ConvTest, ConvDimWithZero) { {} // excluded EPs }; - vector X = vector(); + vector X; vector X_shape = {0, 2, 4, 4}; // N of 0 should be handled - vector W = {1.0f, 2.0f, 1.0f, 2.0f}; + vector W = {MLFloat16(1.0f), MLFloat16(2.0f), MLFloat16(1.0f), MLFloat16(2.0f)}; vector W_shape = {2, 2, 1, 1}; vector out_shape = {0, 2, 4, 4}; - // not handled by ACL - attrs.excluded_providers.insert(kAclExecutionProvider); - - TestConvOp(attrs, {X, W}, {X_shape, W_shape}, {}, out_shape, false, optional(), - OpTester::ExpectResult::kExpectSuccess, "", 10); + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, {}, out_shape); } -TEST(ConvTest, Conv1D_asymmetric_padding) { +TEST(ConvFp16Test, Conv1D_asymmetric_padding) { ConvOpAndTestAttributes attrs = { "", // auto_pad vector{1}, // dilations @@ -943,21 +991,20 @@ TEST(ConvTest, Conv1D_asymmetric_padding) { {} // excluded EPs }; - vector X = {1.f, 2.f, 3.f}; + vector X = {MLFloat16(1.f), MLFloat16(2.f), MLFloat16(3.f)}; vector X_shape = {1, 1, 3}; - vector W = {1.f, 1.f, 1.f}; + vector W = {MLFloat16(1.f), MLFloat16(1.f), MLFloat16(1.f)}; vector W_shape = {1, 1, 3}; - vector B = {0.f}; + vector B = {MLFloat16()}; vector B_shape = {1}; vector Y_shape = {1, 1, 2}; - auto expected_vals = {3.f, 6.f}; - - TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape); + auto expected_vals = {MLFloat16(3.f), MLFloat16(6.f)}; - TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true); + TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape); + TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true); } -TEST(ConvTest, Conv_AutoPad_with_non_default_strides) { +TEST(ConvFp16Test, Conv_AutoPad_with_non_default_strides) { ConvOpAndTestAttributes attrs = { "SAME_LOWER", // auto_pad vector{1, 1}, // dilations @@ -968,29 +1015,312 @@ TEST(ConvTest, Conv_AutoPad_with_non_default_strides) { {} // excluded EPs }; - vector X = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, - 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, - 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, - 15.0f, 16.0f, 17.0f, 18.0f, - 19.0f, 20.0f, 21.0, 22.0f, 23.0f, 24.0f}; + vector X = { + MLFloat16(0.0f), MLFloat16(1.0f), MLFloat16(2.0f), MLFloat16(3.0f), MLFloat16(4.0f), + MLFloat16(5.0f), MLFloat16(6.0f), MLFloat16(7.0f), MLFloat16(8.0f), MLFloat16(9.0f), + MLFloat16(10.0f), MLFloat16(11.0f), MLFloat16(12.0f), MLFloat16(13.0f), MLFloat16(14.0f), + MLFloat16(15.0f), MLFloat16(16.0f), MLFloat16(17.0f), MLFloat16(18.0f), MLFloat16(19.0f), + MLFloat16(20.0f), MLFloat16(21.0f), MLFloat16(22.0f), MLFloat16(23.0f), MLFloat16(24.0f)}; vector X_shape = {1, 1, 5, 5}; - vector W = {1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f}; + vector W = {MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f), + MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f), + MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f)}; vector W_shape = {1, 1, 3, 3}; - auto expected_vals = {12.0f, 27.0f, 24.0f, - 63.0f, 108.0f, 81.0f, - 72.0f, 117.0f, 84.0f}; + auto expected_vals = {MLFloat16(12.0f), MLFloat16(27.0f), MLFloat16(24.0f), + MLFloat16(63.0f), MLFloat16(108.0f), MLFloat16(81.0f), + MLFloat16(72.0f), MLFloat16(117.0f), MLFloat16(84.0f)}; vector Y_shape = {1, 1, 3, 3}; - // Test with weight as initializer - TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); +} + +TEST(ConvFp16Test, Pointwise_2D) { + ConvOpAndTestAttributes attrs = { + "", // auto_pad + vector{1, 1}, // dilations + 1, // group + vector{1, 1}, // kernel_shape + vector{0, 0, 0, 0}, // pads + vector{1, 1}, // strides + {} // excluded EPs + }; + vector X = { + MLFloat16(-9.f), MLFloat16(1.f), MLFloat16(2.f), + MLFloat16(-5.f), MLFloat16(3.f), MLFloat16(-2.f), + MLFloat16(5.f), MLFloat16(-3.f), MLFloat16(1.f), + MLFloat16(1.f), MLFloat16(8.f), MLFloat16(-4.f), + MLFloat16(-1.f), MLFloat16(6.f), MLFloat16(7.f), + MLFloat16(-1.f), MLFloat16(4.f), MLFloat16(-5.f), + MLFloat16(-9.f), MLFloat16(1.f), MLFloat16(2.f), + MLFloat16(-5.f), MLFloat16(3.f), MLFloat16(-2.f), + MLFloat16(5.f), MLFloat16(-3.f), MLFloat16(1.f)}; + vector X_shape = {1, 3, 3, 3}; + vector W = {MLFloat16(2.f), MLFloat16(-3.f), MLFloat16(0.5f), + MLFloat16(0.25f), MLFloat16(-2.f), MLFloat16(-0.75f)}; + vector W_shape = {2, 3, 1, 1}; + vector Y_shape = {1, 2, 3, 3}; + auto expected_vals = { + MLFloat16(-25.5f), MLFloat16(-21.5f), MLFloat16(17.f), + MLFloat16(-9.5f), MLFloat16(-10.5f), MLFloat16(-26.f), + MLFloat16(15.5f), MLFloat16(-19.5f), MLFloat16(17.5f), + MLFloat16(2.5f), MLFloat16(-16.5f), MLFloat16(7.f), + MLFloat16(4.5f), MLFloat16(-13.5f), MLFloat16(-13.f), + MLFloat16(-0.5f), MLFloat16(-6.5f), MLFloat16(9.5f)}; + + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); +} + +TEST(ConvFp16Test, Pointwise_3D) { + ConvOpAndTestAttributes attrs = { + "", // auto_pad + vector{1, 1, 1}, // dilations + 1, // group + vector{1, 1, 1}, // kernel_shape + vector{0, 0, 0, 0, 0, 0}, // pads + vector{1, 1, 1}, // strides + {} // excluded EPs + }; + + vector X = { + MLFloat16(2 / 16.f), MLFloat16(3 / 16.f), MLFloat16(4 / 16.f), + MLFloat16(5 / 16.f), MLFloat16(6 / 16.f), MLFloat16(7 / 16.f), + MLFloat16(8 / 16.f), MLFloat16(9 / 16.f), MLFloat16(10 / 16.f), + MLFloat16(11 / 16.f), MLFloat16(12 / 16.f), MLFloat16(13 / 16.f), + MLFloat16(14 / 16.f), MLFloat16(15 / 16.f), MLFloat16(16 / 16.f), + MLFloat16(17 / 16.f), MLFloat16(18 / 16.f), MLFloat16(19 / 16.f), + MLFloat16(20 / 16.f), MLFloat16(21 / 16.f), MLFloat16(22 / 16.f), + MLFloat16(23 / 16.f), MLFloat16(24 / 16.f), MLFloat16(25 / 16.f), + MLFloat16(26 / 16.f), MLFloat16(27 / 16.f), MLFloat16(28 / 16.f)}; + vector X_shape = {1, 1, 3, 3, 3}; + + vector W = {MLFloat16(0.5f)}; + vector W_shape = {1, 1, 1, 1, 1}; + + auto expected_vals = { + MLFloat16(0.0625f), MLFloat16(0.09375f), MLFloat16(0.125f), + MLFloat16(0.15625f), MLFloat16(0.1875f), MLFloat16(0.21875f), + MLFloat16(0.25f), MLFloat16(0.28125f), MLFloat16(0.3125f), + MLFloat16(0.34375f), MLFloat16(0.375f), MLFloat16(0.40625f), + MLFloat16(0.4375f), MLFloat16(0.46875f), MLFloat16(0.5f), + MLFloat16(0.53125f), MLFloat16(0.5625f), MLFloat16(0.59375f), + MLFloat16(0.625f), MLFloat16(0.65625f), MLFloat16(0.6875f), + MLFloat16(0.71875f), MLFloat16(0.75f), MLFloat16(0.78125f), + MLFloat16(0.8125f), MLFloat16(0.84375f), MLFloat16(0.875f)}; + vector Y_shape = {1, 1, 3, 3, 3}; // Test with weight as initializer - TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); } +#ifndef DISABLE_CONTRIB_OPS + +TEST(ConvFp16Test, Pointwise_Relu) { + ConvOpAndTestAttributes attrs = { + "", // auto_pad + vector{1, 1}, // dilations + 1, // group + vector{1, 1}, // kernel_shape + vector{0, 0, 0, 0}, // pads + vector{1, 1}, // strides + {}, // excluded EPs + "Relu" // activation + }; + + vector X = { + MLFloat16(-9.f), MLFloat16(1.f), MLFloat16(-9.f), + MLFloat16(1.f), MLFloat16(8.f), MLFloat16(1.f), + MLFloat16(2.f), MLFloat16(-4.f), MLFloat16(2.f), + MLFloat16(-5.f), MLFloat16(-1.f), MLFloat16(-5.f), + MLFloat16(3.f), MLFloat16(6.f), MLFloat16(3.f), + MLFloat16(-2.f), MLFloat16(7.f), MLFloat16(-2.f), + MLFloat16(5.f), MLFloat16(-1.f), MLFloat16(5.f), + MLFloat16(-3.f), MLFloat16(4.f), MLFloat16(-3.f), + MLFloat16(1.f), MLFloat16(-5.f), MLFloat16(1.f)}; + vector X_shape = {1, 3, 3, 3}; + vector W = {MLFloat16(2.f), MLFloat16(-3.f), MLFloat16(0.5f), + MLFloat16(0.25f), MLFloat16(-2.f), MLFloat16(-0.75f)}; + vector W_shape = {2, 3, 1, 1}; + vector Y_shape = {1, 3, 3, 2}; + auto expected_vals = { + MLFloat16(0.f), MLFloat16(2.5f), + MLFloat16(0.f), MLFloat16(0.f), + MLFloat16(17.f), MLFloat16(7.f), + MLFloat16(0.f), MLFloat16(4.5f), + MLFloat16(0.f), MLFloat16(0.f), + MLFloat16(0.f), MLFloat16(0.f), + MLFloat16(15.5f), MLFloat16(0.f), + MLFloat16(0.f), MLFloat16(0.f), + MLFloat16(17.5f), MLFloat16(9.5f)}; + + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); +} + +TEST(ConvFp16Test, Conv2D_HardSigmoid) { + ConvOpAndTestAttributes attrs = { + "", // auto_pad + vector{1, 1}, // dilations + 1, // group + vector{2, 2}, // kernel_shape + vector{0, 0, 0, 0}, // pads + vector{1, 1}, // strides + {}, // excluded EPs + "HardSigmoid", // activation + vector{0.2f, 0.5f} // activation_parameters + }; + + vector X = {MLFloat16(1.0f), MLFloat16(2.0f), MLFloat16(3.0f), + MLFloat16(4.0f), MLFloat16(5.0f), MLFloat16(6.0f), + MLFloat16(7.0f), MLFloat16(8.0f), MLFloat16(9.0f)}; + vector X_shape = {1, 3, 3, 1}; + vector W = {MLFloat16(0.125f), MLFloat16(0.125f), MLFloat16(0.125f), MLFloat16(0.125f), + MLFloat16(-0.125f), MLFloat16(-0.125f), MLFloat16(-0.125f), MLFloat16(-0.125f)}; + vector W_shape = {2, 1, 2, 2}; + vector Y_shape = {1, 2, 2, 2}; + auto expected_vals = { + MLFloat16(0.8f), MLFloat16(0.2f), + MLFloat16(0.9f), MLFloat16(0.1f), + MLFloat16(1.0f), MLFloat16(0.0f), + MLFloat16(1.0f), MLFloat16(0.0f)}; + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape); + TestConvFp16Op(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, true); +} + +TEST(ConvFp16Test, Conv2D_Bias_Z_Relu) { + ConvOpAndTestAttributes attrs = { + "", // auto_pad + vector{1, 1}, // dilations + 1, // group + vector{2, 2}, // kernel_shape + vector{0, 0, 0, 0}, // pads + vector{1, 1}, // strides + {}, // excluded EPs + "Relu" // activation + }; + + vector X = {MLFloat16(1.0f), MLFloat16(2.0f), MLFloat16(3.0f), + MLFloat16(4.0f), MLFloat16(5.0f), MLFloat16(6.0f), + MLFloat16(7.0f), MLFloat16(8.0f), MLFloat16(9.0f)}; + vector X_shape = {1, 3, 3, 1}; + vector W = {MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f), + MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f)}; + vector W_shape = {2, 1, 2, 2}; + vector Y_shape = {1, 2, 2, 2}; + vector B = {MLFloat16(1.0f), MLFloat16(-1.0f)}; + vector B_shape = {2}; + vector Z = {MLFloat16(-1.0f), MLFloat16(0.0f), MLFloat16(0.0f), MLFloat16(0.0f), + MLFloat16(0.0f), MLFloat16(0.0f), MLFloat16(0.0f), MLFloat16(1.0f)}; + vector Z_shape = {1, 2, 2, 2}; + auto expected_vals = {MLFloat16(12.0f), MLFloat16(11.0f), MLFloat16(17.0f), MLFloat16(15.0f), MLFloat16(25.0f), MLFloat16(23.0f), MLFloat16(29.0f), MLFloat16(28.0f)}; + TestConvFp16Op(attrs, {X, W, B, Z}, {X_shape, W_shape, B_shape, Z_shape}, expected_vals, Y_shape); + TestConvFp16Op(attrs, {X, W, B, Z}, {X_shape, W_shape, B_shape, Z_shape}, expected_vals, Y_shape, true); +} + +#endif // CONTRIB_OPS + +#ifndef ENABLE_TRAINING +// Prepacking is disabled in full training build so no need to test the feature in a training build. + +const onnxruntime::RunOptions run_options = []() { + onnxruntime::RunOptions options{}; + ORT_THROW_IF_ERROR(options.config_options.AddConfigEntry(kOpTesterRunOptionsConfigTestTunableOp, "true")); + return options; +}(); + +const constexpr auto run_with_tunable_op = &run_options; + +TEST(ConvFp16Test, SharedPrepackedWeights) { + OpTester test("Conv", 11); + + vector X = {MLFloat16(1.0f), MLFloat16(2.0f), MLFloat16(3.0f), + MLFloat16(4.0f), MLFloat16(5.0f), MLFloat16(6.0f), + MLFloat16(7.0f), MLFloat16(8.0f), MLFloat16(9.0f)}; + vector X_shape = {1, 1, 3, 3}; + vector W = {MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f), + MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f), MLFloat16(1.0f)}; + vector W_shape = {2, 1, 2, 2}; + vector Y_shape = {1, 2, 2, 2}; + vector B = {MLFloat16(1.0f), MLFloat16(-1.0f)}; + vector B_shape = {2}; + auto expected_vals = { + MLFloat16(13.0f), MLFloat16(17.0f), MLFloat16(25.0f), MLFloat16(29.0f), + MLFloat16(11.0f), MLFloat16(15.0f), MLFloat16(23.0f), MLFloat16(27.0f)}; + + test.AddInput("X", X_shape, X); + test.AddInput("W", W_shape, W, true); + test.AddInput("B", B_shape, B, true); + test.AddOutput("Y", Y_shape, expected_vals, /*no sort*/ false, 0.002f, 0.0f); + + OrtValue w; + Tensor::InitOrtValue(DataTypeImpl::GetType(), TensorShape(W_shape), + W.data(), OrtMemoryInfo(CPU, OrtAllocatorType::OrtDeviceAllocator), w); + + SessionOptions so; + // Set up B as a shared initializer to be shared between sessions + ASSERT_EQ(so.AddInitializer("W", &w), Status::OK()); + + // We want all sessions running using this OpTester to be able to share pre-packed weights if applicable + test.EnableSharingOfPrePackedWeightsAcrossSessions(); + + // Pre-packing is limited just to the CPU EP for now and we will only test the CPU EP + // and we want to ensure that it is available in this build + auto cpu_ep = []() -> std::vector> { + std::vector> execution_providers; + execution_providers.push_back(DefaultCpuExecutionProvider()); + return execution_providers; + }; + + size_t number_of_pre_packed_weights_counter_session_1 = 0; + size_t number_of_shared_pre_packed_weights_counter = 0; + + // Session 1 + { + test.Config(so) + .Config(run_with_tunable_op) + .ConfigEps(cpu_ep()) + .RunWithConfig(&number_of_pre_packed_weights_counter_session_1, &number_of_shared_pre_packed_weights_counter); + // Assert that no pre-packed weights have been shared thus far + ASSERT_EQ(number_of_shared_pre_packed_weights_counter, static_cast(0)); + } + + auto number_of_elements_in_shared_prepacked_buffers_container = + test.GetNumPrePackedWeightsShared(); + // Assert that the number of elements in the shared container + // is the same as the number of weights that have been pre-packed + ASSERT_EQ(number_of_pre_packed_weights_counter_session_1, number_of_elements_in_shared_prepacked_buffers_container); + + // On some platforms/architectures MLAS may choose to not do any pre-packing and the number of elements + // that have been pre-packed will be zero in which case we do not continue with the testing + // of "sharing" of pre-packed weights as there are no pre-packed weights to be shared at all. + if (number_of_pre_packed_weights_counter_session_1 == 0) + return; + + // Session 2 + { + size_t number_of_pre_packed_weights_counter_session_2 = 0; + test.Config(so) + .Config(run_with_tunable_op) + .ConfigEps(cpu_ep()) + .RunWithConfig(&number_of_pre_packed_weights_counter_session_2, &number_of_shared_pre_packed_weights_counter); + + // Assert that the same number of weights were pre-packed in both sessions + ASSERT_EQ(number_of_pre_packed_weights_counter_session_1, number_of_pre_packed_weights_counter_session_2); + + // Assert that the number of pre-packed weights that were shared equals + // the number of pre-packed weights in the second session + ASSERT_EQ(number_of_pre_packed_weights_counter_session_2, + static_cast(number_of_shared_pre_packed_weights_counter)); + } +} + +#endif + } // namespace test -} // namespace onnxruntime \ No newline at end of file +} // namespace onnxruntime + +#endif // MLAS_F16VEC_INTRINSICS_SUPPORTED \ No newline at end of file From e0e830422ef87527ccd02b7478d874dcf44d74f6 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 23 Dec 2024 16:25:56 +0800 Subject: [PATCH 14/17] typo --- onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc b/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc index f736dab0d69f6..56726038a163c 100644 --- a/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc +++ b/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc @@ -93,7 +93,7 @@ TEST(XnnpackEP, TestNhwcConvReluClipFusion) { #ifdef XNNPACK_FP16_SUPPORTED // This test can be removed if Mlas implemented FP16 Clip fusion. -// Now TestNhwcConvReluClipFusion_FP16 skipped output verification +// Now TestNhwcConvReluClipFusion_FP16 skips output verification TEST(XnnpackEP, TestNhwcConvReluFusion_FP16) { const ORTCHAR_T* ort_model_path = ORT_MODEL_FOLDER "nhwc_conv_relu_model_fp16.onnx"; From f1d3b16a3bd365078e01201190c5c992161800fd Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Wed, 25 Dec 2024 13:54:02 +0800 Subject: [PATCH 15/17] update --- onnxruntime/core/providers/xnnpack/detail/utils.cc | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/onnxruntime/core/providers/xnnpack/detail/utils.cc b/onnxruntime/core/providers/xnnpack/detail/utils.cc index a338bb1689ea3..27c97c06e7234 100644 --- a/onnxruntime/core/providers/xnnpack/detail/utils.cc +++ b/onnxruntime/core/providers/xnnpack/detail/utils.cc @@ -278,12 +278,7 @@ std::unique_ptr FuseActivation(const NodeUnit& node_un value_to_set = utils::HasRawData(value) ? *reinterpret_cast(value.raw_data().data()) : value.float_data()[0]; - } else { - // double isn't currently supported. - // And input and output of Clip must be float number. - // https://onnx.ai/onnx/operators/onnx__Clip.html - ORT_NOT_IMPLEMENTED("Clip min/max must be FP16 or FP32"); - } + } } } }; From 042e5cd713a31141838b9467ad1898101ef2f657 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Thu, 26 Dec 2024 13:51:27 +0800 Subject: [PATCH 16/17] lint --- onnxruntime/core/providers/xnnpack/detail/utils.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/core/providers/xnnpack/detail/utils.cc b/onnxruntime/core/providers/xnnpack/detail/utils.cc index 27c97c06e7234..75ce5cedf5253 100644 --- a/onnxruntime/core/providers/xnnpack/detail/utils.cc +++ b/onnxruntime/core/providers/xnnpack/detail/utils.cc @@ -278,7 +278,7 @@ std::unique_ptr FuseActivation(const NodeUnit& node_un value_to_set = utils::HasRawData(value) ? *reinterpret_cast(value.raw_data().data()) : value.float_data()[0]; - } + } } } }; From d7f9e6c5be6ac33dc1f6ddd6a3331eabde73bb51 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Tue, 7 Jan 2025 15:56:26 +0800 Subject: [PATCH 17/17] update --- onnxruntime/core/providers/xnnpack/detail/utils.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/onnxruntime/core/providers/xnnpack/detail/utils.cc b/onnxruntime/core/providers/xnnpack/detail/utils.cc index 75ce5cedf5253..7b21916948b29 100644 --- a/onnxruntime/core/providers/xnnpack/detail/utils.cc +++ b/onnxruntime/core/providers/xnnpack/detail/utils.cc @@ -278,6 +278,8 @@ std::unique_ptr FuseActivation(const NodeUnit& node_un value_to_set = utils::HasRawData(value) ? *reinterpret_cast(value.raw_data().data()) : value.float_data()[0]; + } else { + ORT_THROW("Now, only FP32 and FP16 are supported to fuse activation in Xnnpack EP", arg_type); } } }