diff --git a/README.md b/README.md index f556cad894..daf715f029 100644 --- a/README.md +++ b/README.md @@ -517,5 +517,6 @@ You may also want to read and cite the amazing work that has extended MTEB & int - Orion Weller, Benjamin Chang, Sean MacAvaney, Kyle Lo, Arman Cohan, Benjamin Van Durme, Dawn Lawrie, Luca Soldaini. "[FollowIR: Evaluating and Teaching Information Retrieval Models to Follow Instructions](https://arxiv.org/abs/2403.15246)" arXiv 2024 - Dawei Zhu, Liang Wang, Nan Yang, Yifan Song, Wenhao Wu, Furu Wei, Sujian Li. "[LongEmbed: Extending Embedding Models for Long Context Retrieval](https://arxiv.org/abs/2404.12096)" arXiv 2024 - Kenneth Enevoldsen, Márton Kardos, Niklas Muennighoff, Kristoffer Laigaard Nielbo. "[The Scandinavian Embedding Benchmarks: Comprehensive Assessment of Multilingual and Monolingual Text Embedding](https://arxiv.org/abs/2406.02396)" arXiv 2024 +- Ali Shiraee Kasmaee, Mohammad Khodadad, Mohammad Arshi Saloot, Nick Sherck, Stephen Dokas, Hamidreza Mahyar, Soheila Samiee. "[ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance & Efficiency on a Specific Domain](https://arxiv.org/abs/2412.00532)" arXiv 2024 For works that have used MTEB for benchmarking, you can find them on the [leaderboard](https://huggingface.co/spaces/mteb/leaderboard). diff --git a/docs/benchmarks.md b/docs/benchmarks.md index a5abe50215..7c0f07d878 100644 --- a/docs/benchmarks.md +++ b/docs/benchmarks.md @@ -7,16 +7,27 @@ The following table gives you an overview of the benchmarks in MTEB. | Name | # Tasks | Task Types | Domains | Languages | |------|---------|------------|---------|-----------| -| [CoIR](https://github.com/CoIR-team/coir) | 10 | {'Retrieval': 10} | [Written, Programming] | python,c++,sql,go,eng,php,javascript,ruby,java | -| [MINERSBitextMining](https://arxiv.org/pdf/2406.07424) | 7 | {'BitextMining': 7} | [Written, Social, Reviews] | sun,kaz,tzl,ido,abs,arq,yue,tam,nij,glg,slk,hsb,ber,xho,cbk,pol,uzb,ina,kab,swh,amh,fao,kzj,lfn,uig,sqi,deu,ang,ind,bug,pms,ibo,cym,eus,spa,ceb,tgl,ron,isl,ita,csb,cha,fin,est,pes,jpn,tel,tha,oci,cmn,min,fry,bbc,epo,lit,rus,bos,hrv,war,ara,bjn,mkd,srp,ast,nno,urd,pam,aze,eng,ace,bew,kor,dan,awa,mui,hye,ban,cor,ben,gle,swe,mad,bul,lat,cat,nob,fra,pcm,ell,mar,vie,tat,ukr,gsw,kat,arz,dsb,lvs,nld,tur,bel,max,nds,afr,khm,dtp,yor,ces,gla,zsm,mak,ile,nov,orv,bre,swg,rej,mhr,mon,mal,jav,heb,slv,bhp,kur,wuu,tuk,por,hun,hin,hau,yid | +| [BRIGHT](https://brightbenchmark.github.io/) | 1 | {'Retrieval': 1} | [Non-fiction] | eng | +| [ChemTEB](https://arxiv.org/abs/2412.00532) | 27 | {'BitextMining': 1, 'Classification': 17, 'Clustering': 2, 'PairClassification': 5, 'Retrieval': 2} | [Chemistry] | nld,tur,eng,ces,kor,zho,spa,hin,jpn,deu,fra,msa,por | +| [CoIR](https://github.com/CoIR-team/coir) | 10 | {'Retrieval': 10} | [Written, Programming] | javascript,ruby,sql,go,eng,java,php,python,c++ | +| [LongEmbed](https://arxiv.org/abs/2404.12096v2) | 6 | {'Retrieval': 6} | [Fiction, Academic, Written, Blog, Non-fiction, Spoken, Encyclopaedic] | eng | +| [MINERSBitextMining](https://arxiv.org/pdf/2406.07424) | 7 | {'BitextMining': 7} | [Reviews, Written, Social] | sqi,ban,srp,jpn,nds,lat,por,mon,kur,bul,slv,mak,deu,uzb,yor,kzj,max,kat,cha,yid,zsm,spa,pms,mhr,min,fao,heb,nij,mui,tuk,rus,bew,swe,pes,slk,ceb,bjn,ido,abs,ukr,ina,kab,tgl,cor,dan,kaz,fry,rej,hrv,ces,lfn,glg,dsb,hau,ace,urd,ben,yue,nld,eng,epo,ron,xho,wuu,cmn,ind,ang,hsb,mad,pam,nov,swh,bbc,pcm,ara,hye,mkd,nno,ast,jav,lvs,mal,swg,nob,tat,arz,vie,ile,tam,est,ber,bre,csb,pol,afr,cbk,bug,tzl,kor,ibo,hun,war,aze,tha,mar,uig,gla,orv,hin,amh,bel,sun,fin,cat,awa,gsw,isl,oci,ell,cym,arq,ita,fra,bos,dtp,eus,bhp,tel,tur,khm,lit,gle | +| MTEB(Europe, beta) | 74 | {'BitextMining': 7, 'Classification': 21, 'Clustering': 8, 'Retrieval': 15, 'InstructionRetrieval': 3, 'MultilabelClassification': 2, 'PairClassification': 6, 'Reranking': 3, 'STS': 9} | [Web, Fiction, Social, Academic, Religious, Written, Medical, Blog, Constructed, Non-fiction, Legal, News, Government, Reviews, Spoken, Encyclopaedic, Programming, Subtitles] | qvm,esk,nlg,toj,gup,llg,jpn,azj,for,lav,kmh,por,bsj,tna,upv,cta,smk,zty,qvz,ntj,ton,uvh,cjk,kgf,gaw,bak,seh,jiv,hui,ksr,uli,kwi,qvw,kkl,arl,msk,omw,aai,tet,yby,mva,fao,kgk,min,kac,dji,box,rus,chz,emp,ktm,bps,bon,nus,bss,cut,sue,meq,kpr,rwo,ceb,zaj,mib,aui,apc,kdl,mxb,okv,rai,big,reg,ulk,mlg,yap,tpt,hrv,nak,plu,nde,kyc,arp,hau,ary,alp,apr,caa,mbh,uvl,zat,bjp,urd,bki,lin,mek,hlt,iws,spl,xav,yml,lcm,ese,xho,are,mux,lww,ndg,ntu,tzj,ame,yss,zar,fil,aii,csy,gvs,zpm,amh,spp,ken,avt,ltz,swh,viv,kmk,zul,bqp,cav,wln,leu,tcs,tuf,mkd,clu,msy,too,ast,amx,quf,jav,yre,nhe,tat,lbk,maj,msm,rug,nor,tbc,prf,pad,zlm,kze,wnc,fai,cbs,mai,aoi,mxq,bao,kos,mlh,nep,mkl,roo,umb,poh,bod,nna,aey,afr,aly,cac,maa,aze,fon,tha,mhl,chd,tpi,tzm,acq,kyz,nbq,yle,ape,bco,att,nin,mkj,yuj,ata,djr,atb,enq,cpb,sxb,rmc,zas,guj,kbq,gfk,tgo,acm,cux,fin,npi,etr,tsn,dob,mpt,alq,byx,cak,cso,spy,oci,asm,ttc,nwi,srn,hmn,gyr,hto,ngu,cpa,tif,fuv,kue,yuw,ote,mgw,ssg,bos,mvn,dop,aso,mox,ndj,stp,mpp,nas,kon,mks,caf,mbs,mcd,wap,cco,tod,aon,aom,cnl,srp,zga,lat,sja,kpj,nhi,nko,swp,bho,blw,mih,mon,sna,bgs,als,kyf,kur,bul,uzb,knj,mam,yor,zos,gdr,aka,bam,bmh,gnw,lid,cha,msc,zpl,gun,qxn,zsm,spa,mgh,nca,cpc,quc,hvn,bvr,agu,ngp,aak,jni,mau,sab,wos,huv,swe,kea,tum,pes,som,pbt,mmo,amo,kgp,taq,sbe,mil,nhg,bmu,bvd,wrs,atg,muy,tpa,ign,vmy,uri,chf,cek,knf,pib,soy,boa,ces,xed,pma,hix,kbc,orm,sim,ace,nhw,kud,ppo,xnn,yut,snx,ilo,zaa,nld,bsp,aau,myk,grn,bkq,cme,bbb,ssd,fur,knc,knv,heg,urw,ayr,ons,sat,crx,rop,szl,suz,ncl,anh,kto,tca,chk,xla,qxh,ziw,ntp,azb,ara,tew,sot,cjv,djk,usa,ltg,cap,arz,lmo,vec,jao,wer,dhg,vie,ded,hop,khk,faa,tam,sus,mwc,ikk,kek,mie,trc,tue,ura,crh,bkd,bzj,kwj,klt,sps,jid,xsi,swa,qxo,lim,nqo,hns,tmd,mbt,mbc,ibo,hun,wrk,bnp,abt,kaq,car,kiz,nvm,nfa,gul,guo,uzn,beo,aer,nhy,otm,cjo,tgk,bel,eri,mca,wsk,rro,row,bsn,tpz,fij,tvk,msb,mpx,abx,poy,sgb,kas,tcz,top,dif,awk,cbc,bea,ell,myy,pus,bmr,ssx,pao,ebk,ajp,opm,wnu,gub,acr,tbf,ubr,cth,taj,aby,kde,mqj,zao,khm,hat,gle,azg,cbv,ian,apu,ptp,kbm,met,plt,sag,agd,pag,ydd,ckb,mzz,div,kmg,miz,tac,tuo,gvn,boj,tee,mph,mna,qwh,gng,agg,mle,rgu,haw,med,kyg,mig,nhu,tnc,waj,kat,lua,zpz,kpx,tof,ven,dzo,yaa,bqc,klv,qul,kqw,bef,gai,heb,nuy,zac,mcr,zpc,ssw,meu,tuk,gui,kmo,usp,otq,khs,ksj,xbi,nya,cya,aoj,kmr,grc,sny,snp,mir,piu,geb,tgl,dik,agn,dan,qvn,kaz,kbp,mto,tiy,xon,zav,dww,zap,kqa,lac,kne,wat,cbt,naf,inb,kwf,crn,azz,wim,ben,wro,poi,yue,awb,cgc,eng,mjc,amf,mps,mwe,ncu,cle,tdt,hne,zai,gdn,toc,bhl,kir,ron,fue,kyq,ixl,ghs,ncj,tbz,nnq,mio,kwd,mxp,beu,sbk,fuh,gym,ztq,mey,ikw,pab,kmb,cof,tso,ipi,byr,aia,wiv,agm,npl,ter,hye,iou,tku,nno,cnt,kqc,sll,lvs,gnn,nob,dah,nii,san,wuv,udu,gux,ots,zpq,cuk,mbj,nab,bjz,hbo,imo,mcf,glk,zam,twi,srd,sin,zca,qvc,agr,con,kjs,zaw,mav,gum,dov,ood,soq,tte,msa,chq,cbk,isn,kpf,ptu,mri,cao,aeb,cni,aaz,yon,pan,sgz,rom,mop,gwi,nou,uig,gla,far,atd,hin,tnp,bbr,kpg,huu,arn,jvn,cat,awa,amm,urb,run,mit,pir,gam,adz,tir,isl,pls,mlt,qve,nyu,txu,tbg,dwy,quy,ruf,kiw,shp,amr,ita,maq,dgr,fra,kin,ubu,gof,gaz,mgc,cmo,ctu,tel,eus,mcq,bpr,ino,snd,bgt,mwf,acu,jic,kkc,jac,lit,xtd,dyu,kvn,zyp,prs,cop,auc,wed,apb,sqi,ban,wal,poe,tnk,myu,otn,kje,ong,bkx,zsr,hch,agt,wiu,spm,zpu,scn,sri,myw,buk,kdc,zho,sbs,slv,deu,kqf,kvg,tgp,bhg,dwr,xtm,amu,wbp,tim,ory,tos,kan,kbh,mya,mwp,mcb,shn,bdd,cub,yrb,tbo,yal,lug,tah,txq,emi,hub,nso,slk,zpo,zpv,bmk,nss,bjn,nch,bzd,shj,ukr,mbl,tlf,kab,kew,kpw,luo,cpy,kmu,kup,zab,pri,snc,wbi,acf,gmv,glg,amp,qup,nop,srq,yka,apw,mqb,wmt,bch,ewe,sey,lbb,epo,qvh,taw,fuc,kql,ksd,smo,gvf,cmn,yad,ind,qvs,obo,wmw,nsn,anv,mic,pap,ake,fas,cbr,bjr,glv,mdy,tsw,gvc,noa,bus,bjv,cwe,pon,pio,snn,mal,nho,bba,jae,mxt,wol,nif,ycn,lao,tfr,ffm,qub,hus,bzh,mlp,mti,not,nys,tzo,arb,mos,kam,cuc,dgc,pah,pjt,est,bxh,hot,bre,kms,cot,awx,bjk,pwg,cpu,hla,mpm,fuf,pol,tnn,shi,auy,mpj,tuc,bug,kor,zad,war,ars,rkb,mni,cbu,lif,mar,dad,mee,dgz,mco,kik,apz,mkn,sco,mbb,maz,lij,khz,hmo,guh,sun,cbi,lgl,nhr,tiw,daa,amn,amk,tke,lex,mag,cym,eko,zia,mcp,gah,urt,sua,cab,quh,srm,vid,blz,mmx,apn,tur,rmy,bem,yaq,ctp,cui,lus,tav,cax,yva | +| MTEB(Indic, beta) | 23 | {'BitextMining': 4, 'Clustering': 1, 'Classification': 13, 'STS': 1, 'PairClassification': 1, 'Retrieval': 2, 'Reranking': 1} | [Web, Fiction, Social, Encyclopaedic, Religious, Written, Constructed, Non-fiction, Legal, News, Spoken, Reviews, Government] | ban,pag,ckb,ydd,srp,azj,jpn,bho,por,sna,als,scn,cjk,zho,mwr,bul,slv,deu,yor,bak,ory,aka,bam,kat,lua,kan,dzo,mya,zsm,spa,shn,min,nus,fao,heb,kac,lug,tuk,kea,rus,ssw,tum,swe,nso,pes,slk,som,mup,pbt,nya,ceb,bjn,kmr,apc,taq,ukr,kab,luo,tgl,dik,dan,kaz,kbp,hrv,ces,glg,ary,hau,ace,urd,ben,boy,ewe,ilo,yue,lin,nld,eng,hne,epo,kir,grn,ron,xho,smo,fur,knc,cmn,ind,ayr,sat,szl,pap,fas,kmb,tso,ltz,swh,brx,zul,azb,doi,ara,hye,mkd,nno,ast,jav,lvs,mal,lao,sot,wol,nob,ltg,tat,san,arz,lmo,vec,nor,vie,sag,khk,arb,mos,kam,tam,bgc,mai,gbm,srd,est,twi,crh,sin,nep,swa,umb,bod,pol,lim,nqo,afr,bug,kor,ibo,mri,hun,aeb,war,ars,mni,fon,tha,mar,tpi,tzm,acq,pan,uzn,kik,gla,uig,hin,lij,tgk,amh,bel,sun,acm,guj,fin,cat,awa,fij,npi,run,tsn,kas,tir,isl,asm,mlt,ell,oci,mag,cym,pus,gom,quy,ajp,raj,fuv,ita,kin,bos,fra,gaz,eus,tel,tur,snd,kon,khm,bem,dyu,gle,hat,lit,prs,lus,plt | +| MTEB(Medical) | 12 | {'Retrieval': 9, 'Clustering': 2, 'Reranking': 1} | [Web, Academic, Medical, Written, Non-fiction, Government] | rus,eng,kor,ara,spa,zho,vie,fra,pol,cmn | +| MTEB(Multilingual, beta) | 132 | {'BitextMining': 13, 'Classification': 43, 'Clustering': 17, 'Retrieval': 18, 'InstructionRetrieval': 3, 'MultilabelClassification': 5, 'PairClassification': 11, 'Reranking': 6, 'STS': 16} | [Web, Fiction, Social, Academic, Religious, Written, Medical, Blog, Constructed, Non-fiction, Legal, Government, News, Reviews, Spoken, Encyclopaedic, Programming, Subtitles] | qvm,esk,nlg,toj,gup,llg,jpn,azj,for,lav,kmh,por,bsj,tna,upv,cta,smk,zty,qvz,ntj,ton,uvh,cjk,kgf,gaw,bak,seh,jiv,hui,ksr,uli,kwi,qvw,kkl,arl,msk,omw,aai,tet,yby,mva,fao,kgk,min,kac,dji,mui,box,rus,chz,emp,bew,ktm,bps,bon,nus,bss,cut,sue,meq,kpr,rwo,ceb,zaj,mib,aui,apc,kdl,mxb,okv,rai,big,reg,ulk,mlg,yap,tpt,rej,hrv,nak,plu,nde,lfn,kyc,arp,hau,ary,alp,apr,caa,mbh,uvl,zat,bjp,urd,bki,lin,mek,hlt,iws,spl,xav,yml,lcm,ese,xho,are,mux,lww,ndg,ntu,tzj,ame,yss,zar,fil,aii,csy,gvs,zpm,amh,spp,ken,avt,ltz,swh,viv,kmk,zul,bqp,cav,wln,leu,tcs,tuf,mkd,clu,msy,too,ast,amx,quf,jav,yre,nhe,tat,lbk,maj,msm,rug,nor,tbc,prf,pad,zlm,kze,wnc,fai,cbs,mai,aoi,mxq,bao,kos,mlh,nep,mkl,roo,umb,poh,bod,nna,aey,afr,aly,cac,maa,aze,fon,tha,mhl,chd,tpi,tzm,acq,kyz,nbq,yle,ape,bco,att,nin,mkj,yuj,ata,djr,atb,enq,cpb,sxb,rmc,zas,guj,kbq,gfk,tgo,acm,cux,fin,npi,etr,tsn,dob,mpt,alq,byx,cak,cso,spy,oci,asm,ttc,nwi,srn,hmn,gyr,hto,arq,ngu,cpa,tif,fuv,raj,kue,yuw,ote,mgw,ssg,bos,mvn,dop,aso,mox,ndj,stp,mpp,nas,kon,mks,caf,mbs,mcd,wap,cco,tod,aon,aom,cnl,srp,zga,lat,sja,kpj,nhi,nko,swp,bho,blw,mih,mon,sna,bgs,als,kyf,kur,bul,uzb,knj,mam,yor,zos,gdr,aka,bam,bmh,gnw,lid,cha,msc,zpl,gun,qxn,zsm,spa,mgh,nca,yid,pms,mhr,cpc,quc,hvn,bvr,agu,svk,ngp,aak,jni,mau,sab,wos,huv,swe,kea,tum,pes,som,mup,pbt,mmo,amo,kgp,ido,taq,sbe,mil,nhg,bmu,bvd,wrs,atg,muy,tpa,chv,ign,vmy,cor,uri,fry,chf,cek,knf,pib,soy,boa,ces,xed,pma,hix,kbc,orm,sim,ace,nhw,kud,ppo,xnn,yut,boy,snx,ilo,zaa,nld,bsp,aau,myk,grn,bkq,cme,bbb,ssd,fur,knc,wuu,knv,heg,urw,ayr,ons,sat,crx,ang,hsb,rop,szl,suz,mad,ncl,anh,kto,tca,chk,xla,qxh,brx,ziw,ntp,azb,ara,tew,sot,cjv,djk,usa,ltg,cap,arz,lmo,vec,jao,wer,dhg,vie,ded,hop,khk,faa,tam,bgc,sus,mwc,ikk,kek,mie,trc,tue,ura,crh,ber,bkd,bzj,kwj,klt,sps,jid,xsi,swa,qxo,csb,lim,nqo,hns,tmd,mbt,mbc,ibo,hun,wrk,bnp,abt,kaq,car,kiz,nvm,nfa,gul,guo,uzn,beo,aer,nhy,otm,orv,cjo,tgk,bel,eri,mca,wsk,rro,row,bsn,tpz,fij,tvk,msb,mpx,abx,poy,sgb,kas,tcz,top,dif,awk,cbc,bea,ell,myy,pus,bmr,ssx,pao,ebk,ajp,opm,wnu,gub,acr,max,tbf,ubr,cth,taj,aby,kde,mqj,zao,tyv,khm,hat,gle,azg,cbv,ian,apu,ptp,kbm,met,plt,sag,agd,sah,pag,ydd,ckb,mzz,div,kmg,miz,tac,tuo,gvn,boj,tee,mph,mna,qwh,gng,agg,mle,mak,rgu,haw,med,kyg,mig,nhu,tnc,waj,kat,lua,zpz,kpx,tof,ven,dzo,yaa,bqc,klv,qul,kqw,bef,gai,heb,nuy,zac,mcr,zpc,ssw,meu,tuk,gui,kmo,usp,otq,khs,ksj,xbi,nya,cya,aoj,kmr,grc,sny,snp,mir,piu,geb,tgl,dik,agn,dan,qvn,kaz,kbp,mto,tiy,xon,zav,dww,zap,kqa,lac,kne,wat,cbt,naf,inb,kwf,crn,azz,wim,ben,wro,poi,yue,awb,cgc,eng,mjc,amf,mps,mwe,ncu,cle,tdt,hne,zai,gdn,toc,bhl,kir,ron,fue,kyq,ixl,ghs,ncj,tbz,nnq,mio,kwd,mxp,beu,sbk,fuh,gym,ztq,mey,ikw,pab,pam,kmb,cof,tso,ipi,byr,aia,wiv,pcm,agm,doi,npl,ter,hye,iou,tku,nno,cnt,kqc,sll,lvs,gnn,nob,dah,nii,san,wuv,udu,gux,ots,zpq,cuk,mbj,nab,bjz,hbo,imo,mcf,glk,zam,twi,srd,sin,zca,qvc,agr,con,kjs,zaw,mav,gum,dov,ood,soq,tte,msa,chq,cbk,tzl,isn,kpf,ptu,mri,cao,aeb,cni,aaz,yon,pan,sgz,rom,mop,gwi,nou,uig,gla,far,atd,hin,tnp,bbr,kpg,huu,arn,jvn,cat,awa,amm,urb,run,mit,pir,gam,adz,tir,isl,pls,mlt,gsw,qve,nyu,txu,tbg,dwy,quy,ruf,kiw,shp,amr,ita,maq,dgr,fra,kin,ubu,gof,gaz,mgc,cmo,ctu,tel,eus,mcq,bpr,ino,snd,bgt,mwf,acu,jic,kkc,jac,lit,xtd,dyu,kvn,zyp,prs,cop,auc,wed,apb,sqi,ban,wal,poe,tnk,myu,otn,kje,ong,bkx,zsr,nds,hch,agt,wiu,spm,zpu,scn,sri,myw,buk,kdc,zho,sbs,slv,mwr,deu,kqf,kvg,tgp,bhg,dwr,xtm,amu,wbp,tim,ory,kzj,tos,kan,kbh,mya,mwp,mcb,shn,bdd,cub,yrb,tbo,yal,nij,lug,tah,txq,emi,hub,nso,slk,zpo,zpv,bmk,nss,bjn,nch,abs,bzd,shj,ukr,mbl,ina,tlf,kab,kew,kpw,luo,cpy,kmu,kup,zab,pri,snc,wbi,acf,gmv,glg,dsb,amp,qup,nop,srq,yka,apw,mqb,wmt,bch,ewe,sey,lbb,epo,qvh,taw,fuc,kql,ksd,smo,gvf,cmn,yad,ind,qvs,obo,wmw,nsn,anv,mic,pap,ake,nov,fas,cbr,bjr,glv,mdy,bbc,tsw,gvc,noa,bus,bjv,cwe,pon,pio,snn,swg,mal,nho,bba,jae,mxt,wol,nif,ycn,lao,tfr,ffm,qub,hus,bzh,mlp,mti,not,nys,ile,tzo,arb,mos,kam,cuc,dgc,pah,pjt,gbm,est,bxh,hot,bre,kms,cot,awx,bjk,pwg,cpu,hla,mpm,fuf,pol,tnn,shi,auy,mpj,tuc,bug,kor,zad,war,ars,rkb,mni,cbu,lif,mar,krc,dad,mee,dgz,mco,kik,apz,mkn,sco,mbb,maz,lij,khz,hmo,guh,sun,cbi,lgl,nhr,tiw,daa,amn,amk,tke,lex,mag,cym,gom,eko,zia,mcp,gah,urt,sua,cab,quh,srm,dtp,vid,blz,bhp,mmx,apn,tur,rmy,bem,yaq,ctp,cui,lus,tav,cax,yva | | [MTEB(Retrieval w/Instructions)](https://arxiv.org/abs/2403.15246) | 3 | {'InstructionRetrieval': 3} | [Written, News] | eng | -| [MTEB(Scandinavian)](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/) | 28 | {'BitextMining': 2, 'Classification': 13, 'Retrieval': 7, 'Clustering': 6} | [Encyclopaedic, Spoken, Non-fiction, Government, News, Fiction, Social, Blog, Reviews, Written, Web, Legal] | nob,fao,swe,isl,dan,nno | -| MTEB(code) | 12 | {'Retrieval': 12} | [Written, Programming] | python,c++,sql,c,go,eng,shell,typescript,php,scala,rust,swift,javascript,ruby,java | -| [MTEB(deu)](https://arxiv.org/html/2401.02709v1) | 19 | {'Classification': 6, 'Clustering': 4, 'PairClassification': 2, 'Reranking': 1, 'Retrieval': 4, 'STS': 2} | [Encyclopaedic, Spoken, News, Reviews, Written, Web] | eng,deu,pol,fra | -| MTEB(eng) | 67 | {'Classification': 12, 'Retrieval': 26, 'Clustering': 11, 'Reranking': 4, 'STS': 10, 'PairClassification': 3, 'Summarization': 1} | [Encyclopaedic, Spoken, Non-fiction, Blog, News, Medical, Social, Programming, Written, Reviews, Web, Academic] | tur,fra,eng,cmn,pol,ita,nld,spa,deu,ara | -| [MTEB(fra)](https://arxiv.org/abs/2405.20468) | 26 | {'Classification': 6, 'Clustering': 7, 'PairClassification': 2, 'Reranking': 2, 'Retrieval': 5, 'STS': 3, 'Summarization': 1} | [Encyclopaedic, Spoken, Non-fiction, News, Social, Reviews, Written, Web, Legal, Academic] | eng,deu,pol,fra | -| MTEB(kor) | 6 | {'Classification': 1, 'Reranking': 1, 'Retrieval': 2, 'STS': 2} | [Encyclopaedic, Spoken, News, Reviews, Written, Web] | kor | -| [MTEB(law)](https://aclanthology.org/2023.eacl-main.148/) | 8 | {'Retrieval': 8} | [Written, Legal] | eng,deu,zho | -| [MTEB(pol)](https://arxiv.org/abs/2405.10138) | 18 | {'Classification': 7, 'Clustering': 3, 'PairClassification': 4, 'STS': 4} | [Spoken, Non-fiction, News, Fiction, Social, Written, Web, Legal, Academic] | pol,deu,eng,fra | -| [MTEB(rus)](https://aclanthology.org/2023.eacl-main.148/) | 23 | {'Classification': 9, 'Clustering': 3, 'MultilabelClassification': 2, 'PairClassification': 1, 'Reranking': 2, 'Retrieval': 3, 'STS': 3} | [Encyclopaedic, Spoken, Blog, News, Social, Reviews, Written, Web, Academic] | rus | +| [MTEB(Scandinavian)](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/) | 28 | {'BitextMining': 2, 'Classification': 13, 'Retrieval': 7, 'Clustering': 6} | [Web, Fiction, Social, Written, Blog, Non-fiction, Legal, News, Spoken, Reviews, Government, Encyclopaedic] | swe,nno,isl,dan,fao,nob | +| MTEB(code) | 12 | {'Retrieval': 12} | [Written, Programming] | javascript,ruby,sql,go,c,eng,shell,typescript,rust,java,php,python,scala,swift,c++ | +| [MTEB(deu)](https://arxiv.org/html/2401.02709v1) | 19 | {'Classification': 6, 'Clustering': 4, 'PairClassification': 2, 'Reranking': 1, 'Retrieval': 4, 'STS': 2} | [Web, Written, News, Spoken, Reviews, Encyclopaedic] | pol,deu,fra,eng | +| MTEB(eng, beta) | 41 | {'Classification': 8, 'Retrieval': 10, 'Clustering': 8, 'Reranking': 2, 'STS': 9, 'PairClassification': 3, 'Summarization': 1} | [Web, Academic, Social, Written, Medical, Blog, Non-fiction, News, Spoken, Reviews, Encyclopaedic, Programming] | nld,tur,eng,ara,spa,ita,deu,fra,pol,cmn | +| MTEB(eng, classic) | 67 | {'Classification': 12, 'Retrieval': 26, 'Clustering': 11, 'Reranking': 4, 'STS': 10, 'PairClassification': 3, 'Summarization': 1} | [Web, Academic, Social, Written, Medical, Blog, Non-fiction, News, Spoken, Reviews, Encyclopaedic, Programming] | nld,tur,eng,ara,spa,ita,deu,fra,pol,cmn | +| [MTEB(fra)](https://arxiv.org/abs/2405.20468) | 26 | {'Classification': 6, 'Clustering': 7, 'PairClassification': 2, 'Reranking': 2, 'Retrieval': 5, 'STS': 3, 'Summarization': 1} | [Web, Academic, Social, Written, Non-fiction, Legal, News, Spoken, Reviews, Encyclopaedic] | pol,deu,fra,eng | +| [MTEB(jpn)](https://github.com/sbintuitions/JMTEB) | 16 | {'Clustering': 2, 'Classification': 4, 'STS': 2, 'PairClassification': 1, 'Retrieval': 6, 'Reranking': 1} | [Web, Academic, Written, Non-fiction, News, Spoken, Reviews, Encyclopaedic] | jpn | +| MTEB(kor) | 6 | {'Classification': 1, 'Reranking': 1, 'Retrieval': 2, 'STS': 2} | [Web, Written, News, Spoken, Reviews, Encyclopaedic] | kor | +| [MTEB(law)](https://aclanthology.org/2023.eacl-main.148/) | 8 | {'Retrieval': 8} | [Written, Legal] | deu,zho,eng | +| [MTEB(pol)](https://arxiv.org/abs/2405.10138) | 18 | {'Classification': 7, 'Clustering': 3, 'PairClassification': 4, 'STS': 4} | [Web, Fiction, Academic, Social, Written, Non-fiction, Legal, News, Spoken] | pol,deu,fra,eng | +| [MTEB(rus)](https://aclanthology.org/2023.eacl-main.148/) | 23 | {'Classification': 9, 'Clustering': 3, 'MultilabelClassification': 2, 'PairClassification': 1, 'Reranking': 2, 'Retrieval': 3, 'STS': 3} | [Web, Social, Academic, Written, Blog, News, Spoken, Reviews, Encyclopaedic] | rus | +| [NanoBEIR](https://huggingface.co/collections/zeta-alpha-ai/nanobeir-66e1a0af21dfd93e620cd9f6) | 13 | {'Retrieval': 13} | [Web, Academic, Social, Medical, Written, Non-fiction, News, Encyclopaedic] | eng | +| [RAR-b](https://arxiv.org/abs/2404.06347) | 17 | {'Retrieval': 17} | [Encyclopaedic, Written, Programming] | eng | \ No newline at end of file diff --git a/docs/tasks.md b/docs/tasks.md index f4ec3c792e..15b9474168 100644 --- a/docs/tasks.md +++ b/docs/tasks.md @@ -122,6 +122,8 @@ The following tables give you an overview of the tasks in MTEB. | [CUREv1](https://huggingface.co/datasets/clinia/CUREv1) | ['eng', 'fra', 'spa'] | Retrieval | s2p | [Medical, Academic, Written] | None | None | | [CanadaTaxCourtOutcomesLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | | [CataloniaTweetClassification](https://aclanthology.org/2020.lrec-1.171/) | ['cat', 'spa'] | Classification | s2s | [Social, Government, Written] | None | None | +| [ChemHotpotQARetrieval](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Retrieval | s2p | [Chemistry] | None | None | +| [ChemNQRetrieval](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Retrieval | s2p | [Chemistry] | None | None | | [ClimateFEVER](https://www.sustainablefinance.uzh.ch/en/research/climate-fever.html) (Thomas Diggelmann, 2021) | ['eng'] | Retrieval | s2p | | None | None | | [ClimateFEVERHardNegatives](https://www.sustainablefinance.uzh.ch/en/research/climate-fever.html) (Thomas Diggelmann, 2021) | ['eng'] | Retrieval | s2p | | None | None | | [CmedqaRetrieval](https://aclanthology.org/2022.emnlp-main.357.pdf) | ['cmn'] | Retrieval | s2p | [Medical, Written] | None | None | @@ -425,6 +427,12 @@ The following tables give you an overview of the tasks in MTEB. | [PolEmo2.0-IN](https://aclanthology.org/K19-1092.pdf) | ['pol'] | Classification | s2s | [Written, Social] | None | None | | [PolEmo2.0-OUT](https://aclanthology.org/K19-1092.pdf) | ['pol'] | Classification | s2s | [Written, Social] | None | None | | [PpcPC](https://arxiv.org/pdf/2207.12759.pdf) (Sławomir Dadas, 2022) | ['pol'] | PairClassification | s2s | [Fiction, Non-fiction, Web, Written, Spoken, Social, News] | None | None | +| [PubChemAISentenceParaphrasePC](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | PairClassification | s2s | [Chemistry] | None | None | +| [PubChemSMILESBitextMining](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | BitextMining | s2s | [Chemistry] | None | None | +| [PubChemSMILESPC](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | PairClassification | s2s | [Chemistry] | None | None | +| [PubChemSynonymPC](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | PairClassification | s2s | [Chemistry] | None | None | +| [PubChemWikiPairClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['ces', 'deu', 'eng', 'fra', 'hin', 'jpn', 'kor', 'msa', 'nld', 'por', 'spa', 'tur', 'zho'] | PairClassification | s2s | [Chemistry] | None | None | +| [PubChemWikiParagraphsPC](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | PairClassification | p2p | [Chemistry] | None | None | | [PublicHealthQA](https://huggingface.co/datasets/xhluca/publichealth-qa) | ['ara', 'eng', 'fra', 'kor', 'rus', 'spa', 'vie', 'zho'] | Retrieval | s2p | [Medical, Government, Web, Written] | None | None | | [PunjabiNewsClassification](https://github.com/goru001/nlp-for-punjabi/) (Anoop Kunchukuttan, 2020) | ['pan'] | Classification | s2s | [News, Written] | None | None | | [QBQTC](https://github.com/CLUEbenchmark/QBQTC/tree/main/dataset) | ['cmn'] | STS | s2s | | None | None | @@ -468,6 +476,8 @@ The following tables give you an overview of the tasks in MTEB. | [SCDDVerificationLegalBenchClassification](https://huggingface.co/datasets/nguha/legalbench) (Neel Guha, 2023) | ['eng'] | Classification | s2s | [Legal, Written] | None | None | | [SCIDOCS](https://allenai.org/data/scidocs) (Arman Cohan, 2020) | ['eng'] | Retrieval | s2p | [Academic, Written, Non-fiction] | None | None | | [SCIDOCS-PL](https://allenai.org/data/scidocs) (Konrad Wojtasik, 2024) | ['pol'] | Retrieval | s2p | | None | None | +| [SDSEyeProtectionClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2p | [Chemistry] | None | None | +| [SDSGlovesClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2p | [Chemistry] | None | None | | [SIB200Classification](https://arxiv.org/abs/2309.07445) (Adelani et al., 2023) | ['ace', 'acm', 'acq', 'aeb', 'afr', 'ajp', 'aka', 'als', 'amh', 'apc', 'arb', 'ars', 'ary', 'arz', 'asm', 'ast', 'awa', 'ayr', 'azb', 'azj', 'bak', 'bam', 'ban', 'bel', 'bem', 'ben', 'bho', 'bjn', 'bod', 'bos', 'bug', 'bul', 'cat', 'ceb', 'ces', 'cjk', 'ckb', 'crh', 'cym', 'dan', 'deu', 'dik', 'dyu', 'dzo', 'ell', 'eng', 'epo', 'est', 'eus', 'ewe', 'fao', 'fij', 'fin', 'fon', 'fra', 'fur', 'fuv', 'gaz', 'gla', 'gle', 'glg', 'grn', 'guj', 'hat', 'hau', 'heb', 'hin', 'hne', 'hrv', 'hun', 'hye', 'ibo', 'ilo', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kab', 'kac', 'kam', 'kan', 'kas', 'kat', 'kaz', 'kbp', 'kea', 'khk', 'khm', 'kik', 'kin', 'kir', 'kmb', 'kmr', 'knc', 'kon', 'kor', 'lao', 'lij', 'lim', 'lin', 'lit', 'lmo', 'ltg', 'ltz', 'lua', 'lug', 'luo', 'lus', 'lvs', 'mag', 'mai', 'mal', 'mar', 'min', 'mkd', 'mlt', 'mni', 'mos', 'mri', 'mya', 'nld', 'nno', 'nob', 'npi', 'nqo', 'nso', 'nus', 'nya', 'oci', 'ory', 'pag', 'pan', 'pap', 'pbt', 'pes', 'plt', 'pol', 'por', 'prs', 'quy', 'ron', 'run', 'rus', 'sag', 'san', 'sat', 'scn', 'shn', 'sin', 'slk', 'slv', 'smo', 'sna', 'snd', 'som', 'sot', 'spa', 'srd', 'srp', 'ssw', 'sun', 'swe', 'swh', 'szl', 'tam', 'taq', 'tat', 'tel', 'tgk', 'tgl', 'tha', 'tir', 'tpi', 'tsn', 'tso', 'tuk', 'tum', 'tur', 'twi', 'tzm', 'uig', 'ukr', 'umb', 'urd', 'uzn', 'vec', 'vie', 'war', 'wol', 'xho', 'ydd', 'yor', 'yue', 'zho', 'zsm', 'zul'] | Classification | s2s | [News, Written] | None | None | | [SIB200ClusteringS2S](https://arxiv.org/abs/2309.07445) (Adelani et al., 2023) | ['ace', 'acm', 'acq', 'aeb', 'afr', 'ajp', 'aka', 'als', 'amh', 'apc', 'arb', 'ars', 'ary', 'arz', 'asm', 'ast', 'awa', 'ayr', 'azb', 'azj', 'bak', 'bam', 'ban', 'bel', 'bem', 'ben', 'bho', 'bjn', 'bod', 'bos', 'bug', 'bul', 'cat', 'ceb', 'ces', 'cjk', 'ckb', 'crh', 'cym', 'dan', 'deu', 'dik', 'dyu', 'dzo', 'ell', 'eng', 'epo', 'est', 'eus', 'ewe', 'fao', 'fij', 'fin', 'fon', 'fra', 'fur', 'fuv', 'gaz', 'gla', 'gle', 'glg', 'grn', 'guj', 'hat', 'hau', 'heb', 'hin', 'hne', 'hrv', 'hun', 'hye', 'ibo', 'ilo', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kab', 'kac', 'kam', 'kan', 'kas', 'kat', 'kaz', 'kbp', 'kea', 'khk', 'khm', 'kik', 'kin', 'kir', 'kmb', 'kmr', 'knc', 'kon', 'kor', 'lao', 'lij', 'lim', 'lin', 'lit', 'lmo', 'ltg', 'ltz', 'lua', 'lug', 'luo', 'lus', 'lvs', 'mag', 'mai', 'mal', 'mar', 'min', 'mkd', 'mlt', 'mni', 'mos', 'mri', 'mya', 'nld', 'nno', 'nob', 'npi', 'nqo', 'nso', 'nus', 'nya', 'oci', 'ory', 'pag', 'pan', 'pap', 'pbt', 'pes', 'plt', 'pol', 'por', 'prs', 'quy', 'ron', 'run', 'rus', 'sag', 'san', 'sat', 'scn', 'shn', 'sin', 'slk', 'slv', 'smo', 'sna', 'snd', 'som', 'sot', 'spa', 'srd', 'srp', 'ssw', 'sun', 'swe', 'swh', 'szl', 'tam', 'taq', 'tat', 'tel', 'tgk', 'tgl', 'tha', 'tir', 'tpi', 'tsn', 'tso', 'tuk', 'tum', 'tur', 'twi', 'tzm', 'uig', 'ukr', 'umb', 'urd', 'uzn', 'vec', 'vie', 'war', 'wol', 'xho', 'ydd', 'yor', 'yue', 'zho', 'zsm', 'zul'] | Clustering | s2s | [News, Written] | None | None | | [SICK-BR-PC](https://linux.ime.usp.br/~thalen/SICK_PT.pdf) | ['por'] | PairClassification | s2s | [Web, Written] | None | None | @@ -595,8 +605,25 @@ The following tables give you an overview of the tasks in MTEB. | [WebLINXCandidatesReranking](https://mcgill-nlp.github.io/weblinx) (Xing Han Lù, 2024) | ['eng'] | Reranking | p2p | [Academic, Web, Written] | None | None | | [WikiCitiesClustering](https://huggingface.co/datasets/wikipedia) | ['eng'] | Clustering | p2p | [Encyclopaedic, Written] | None | None | | [WikiClusteringP2P.v2](https://github.com/Rysias/wiki-clustering) | ['bos', 'cat', 'ces', 'dan', 'eus', 'glv', 'ilo', 'kur', 'lav', 'min', 'mlt', 'sco', 'sqi', 'wln'] | Clustering | p2p | [Encyclopaedic, Written] | None | None | +| [WikipediaBioMetChemClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2s | [Chemistry] | None | None | +| [WikipediaBiolumNeurochemClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2s | [Chemistry] | None | None | +| [WikipediaChemEngSpecialtiesClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2s | [Chemistry] | None | None | +| [WikipediaChemFieldsClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2s | [Chemistry] | None | None | +| [WikipediaChemistryTopicsClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2s | [Chemistry] | None | None | +| [WikipediaChemistryTopicsClustering](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Clustering | s2p | [Chemistry] | None | None | +| [WikipediaCompChemSpectroscopyClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2s | [Chemistry] | None | None | +| [WikipediaCryobiologySeparationClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2s | [Chemistry] | None | None | +| [WikipediaCrystallographyAnalyticalClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2s | [Chemistry] | None | None | +| [WikipediaGreenhouseEnantiopureClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2s | [Chemistry] | None | None | +| [WikipediaIsotopesFissionClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2s | [Chemistry] | None | None | +| [WikipediaLuminescenceClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2s | [Chemistry] | None | None | +| [WikipediaOrganicInorganicClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2s | [Chemistry] | None | None | | [WikipediaRerankingMultilingual](https://huggingface.co/datasets/ellamind/wikipedia-2023-11-reranking-multilingual) | ['ben', 'bul', 'ces', 'dan', 'deu', 'eng', 'fas', 'fin', 'hin', 'ita', 'nld', 'nor', 'por', 'ron', 'srp', 'swe'] | Reranking | s2p | [Encyclopaedic, Written] | {'test': 24000} | {'test': {'num_samples': 24000, 'number_of_characters': 83866932, 'num_positive': 24000, 'num_negative': 192000, 'min_query_length': 7, 'avg_query_length': 59.09, 'max_query_length': 180, 'unique_query': 23997, 'min_positive_length': 100, 'avg_positive_length': 385.45, 'max_positive_length': 3515, 'unique_positive': 23993, 'min_negative_length': 100, 'avg_negative_length': 381.24, 'max_negative_length': 9461, 'unique_negative': 191783, 'hf_subset_descriptive_stats': {'bg': {'num_samples': 1500, 'number_of_characters': 5145316, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 18, 'avg_query_length': 60.83, 'max_query_length': 166, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 375.89, 'max_positive_length': 2241, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 374.19, 'max_negative_length': 4869, 'unique_negative': 11996}, 'bn': {'num_samples': 1500, 'number_of_characters': 5390581, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 7, 'avg_query_length': 47.27, 'max_query_length': 123, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 394.59, 'max_positive_length': 2338, 'unique_positive': 1499, 'min_negative_length': 100, 'avg_negative_length': 393.98, 'max_negative_length': 5104, 'unique_negative': 11996}, 'cs': {'num_samples': 1500, 'number_of_characters': 5079180, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 17, 'avg_query_length': 56.27, 'max_query_length': 137, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 383.84, 'max_positive_length': 2300, 'unique_positive': 1499, 'min_negative_length': 100, 'avg_negative_length': 368.25, 'max_negative_length': 3487, 'unique_negative': 11982}, 'da': {'num_samples': 1500, 'number_of_characters': 4746132, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 17, 'avg_query_length': 56.75, 'max_query_length': 137, 'unique_query': 1499, 'min_positive_length': 100, 'avg_positive_length': 351.68, 'max_positive_length': 2159, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 344.46, 'max_negative_length': 2563, 'unique_negative': 11972}, 'de': {'num_samples': 1500, 'number_of_characters': 5483592, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 20, 'avg_query_length': 70.0, 'max_query_length': 180, 'unique_query': 1499, 'min_positive_length': 100, 'avg_positive_length': 391.54, 'max_positive_length': 2674, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 399.27, 'max_negative_length': 3083, 'unique_negative': 12000}, 'en': {'num_samples': 1500, 'number_of_characters': 6217884, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 18, 'avg_query_length': 68.37, 'max_query_length': 162, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 451.73, 'max_positive_length': 3515, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 453.14, 'max_negative_length': 3662, 'unique_negative': 12000}, 'fa': {'num_samples': 1500, 'number_of_characters': 4732619, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 12, 'avg_query_length': 48.67, 'max_query_length': 119, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 347.7, 'max_positive_length': 2571, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 344.84, 'max_negative_length': 4707, 'unique_negative': 11978}, 'fi': {'num_samples': 1500, 'number_of_characters': 5209132, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 14, 'avg_query_length': 55.34, 'max_query_length': 132, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 394.71, 'max_positive_length': 2129, 'unique_positive': 1498, 'min_negative_length': 100, 'avg_negative_length': 377.84, 'max_negative_length': 2574, 'unique_negative': 11972}, 'hi': {'num_samples': 1500, 'number_of_characters': 5620959, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 13, 'avg_query_length': 50.78, 'max_query_length': 125, 'unique_query': 1499, 'min_positive_length': 100, 'avg_positive_length': 420.38, 'max_positive_length': 2361, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 409.52, 'max_negative_length': 5912, 'unique_negative': 11996}, 'it': {'num_samples': 1500, 'number_of_characters': 5420496, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 23, 'avg_query_length': 70.05, 'max_query_length': 156, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 396.97, 'max_positive_length': 2082, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 393.33, 'max_negative_length': 9461, 'unique_negative': 11993}, 'nl': {'num_samples': 1500, 'number_of_characters': 5169556, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 18, 'avg_query_length': 65.34, 'max_query_length': 136, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 380.79, 'max_positive_length': 1864, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 375.03, 'max_negative_length': 3641, 'unique_negative': 11985}, 'pt': {'num_samples': 1500, 'number_of_characters': 5474356, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 18, 'avg_query_length': 65.12, 'max_query_length': 176, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 404.02, 'max_positive_length': 3057, 'unique_positive': 1499, 'min_negative_length': 100, 'avg_negative_length': 397.55, 'max_negative_length': 2877, 'unique_negative': 11991}, 'ro': {'num_samples': 1500, 'number_of_characters': 4796113, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 14, 'avg_query_length': 61.97, 'max_query_length': 169, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 346.71, 'max_positive_length': 1917, 'unique_positive': 1499, 'min_negative_length': 100, 'avg_negative_length': 348.59, 'max_negative_length': 4213, 'unique_negative': 11971}, 'sr': {'num_samples': 1500, 'number_of_characters': 5271732, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 15, 'avg_query_length': 55.67, 'max_query_length': 146, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 386.35, 'max_positive_length': 2421, 'unique_positive': 1499, 'min_negative_length': 100, 'avg_negative_length': 384.06, 'max_negative_length': 3668, 'unique_negative': 11974}, 'no': {'num_samples': 1500, 'number_of_characters': 5036586, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 14, 'avg_query_length': 55.29, 'max_query_length': 129, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 367.72, 'max_positive_length': 1450, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 366.84, 'max_negative_length': 2841, 'unique_negative': 11996}, 'sv': {'num_samples': 1500, 'number_of_characters': 5072698, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 17, 'avg_query_length': 57.73, 'max_query_length': 133, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 372.59, 'max_positive_length': 2493, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 368.94, 'max_negative_length': 3680, 'unique_negative': 11999}}}} | | [WikipediaRetrievalMultilingual](https://huggingface.co/datasets/ellamind/wikipedia-2023-11-retrieval-multilingual-queries) | ['ben', 'bul', 'ces', 'dan', 'deu', 'eng', 'fas', 'fin', 'hin', 'ita', 'nld', 'nor', 'por', 'ron', 'srp', 'swe'] | Retrieval | s2p | [Encyclopaedic, Written] | None | None | +| [WikipediaSaltsSemiconductorsClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2s | [Chemistry] | None | None | +| [WikipediaSolidStateColloidalClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2s | [Chemistry] | None | None | +| [WikipediaSpecialtiesInChemistryClustering](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Clustering | s2p | [Chemistry] | None | None | +| [WikipediaTheoreticalAppliedClassification](https://arxiv.org/abs/2412.00532) (Kasmaee et al., 2024) | ['eng'] | Classification | s2s | [Chemistry] | None | None | | [WinoGrande](https://winogrande.allenai.org/) (Xiao et al., 2024) | ['eng'] | Retrieval | s2s | [Encyclopaedic, Written] | None | None | | [WisesightSentimentClassification](https://github.com/PyThaiNLP/wisesight-sentiment) | ['tha'] | Classification | s2s | [Social, News, Written] | None | None | | XMarket (Bonab et al., 2021) | ['deu', 'eng', 'spa'] | Retrieval | s2p | | None | None | @@ -819,7 +846,7 @@ The following tables give you an overview of the tasks in MTEB. | cco | Comaltepec Chinantec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ceb | Cebuano | Austronesian | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 6 | | cek | Eastern Khumi Chin | Sino-Tibetan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| ces | Czech | Indo-European | 4 | 5 | 2 | 0 | 1 | 1 | 1 | 2 | 0 | 0 | 0 | 16 | +| ces | Czech | Indo-European | 4 | 5 | 2 | 0 | 1 | 2 | 1 | 2 | 0 | 0 | 0 | 17 | | cgc | Kagayanen | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cha | Chamorro | Austronesian | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | | chd | Highland Oaxaca Chontal | Tequistlatecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | @@ -875,7 +902,7 @@ The following tables give you an overview of the tasks in MTEB. | dah | Gwahatike | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | dan | Danish | Indo-European | 5 | 9 | 2 | 0 | 1 | 0 | 1 | 5 | 0 | 0 | 0 | 23 | | ded | Dedua | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| deu | German | Indo-European | 6 | 14 | 7 | 0 | 1 | 6 | 2 | 18 | 4 | 0 | 0 | 58 | +| deu | German | Indo-European | 6 | 14 | 7 | 0 | 1 | 7 | 2 | 18 | 4 | 0 | 0 | 59 | | dgc | Casiguran Dumagat Agta | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | dgr | Dogrib | Athabaskan-Eyak-Tlingit | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | dgz | Daga | Dagan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | @@ -903,7 +930,7 @@ The following tables give you an overview of the tasks in MTEB. | ell | Modern Greek (1453-) | Indo-European | 3 | 6 | 1 | 0 | 1 | 2 | 0 | 3 | 0 | 0 | 0 | 16 | | emi | Mussau-Emira | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | emp | Northern Emberá | Chocoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| eng | English | Indo-European | 16 | 143 | 16 | 3 | 1 | 8 | 8 | 105 | 13 | 2 | 1 | 316 | +| eng | English | Indo-European | 17 | 160 | 18 | 3 | 1 | 13 | 8 | 107 | 13 | 2 | 1 | 343 | | enq | Enga | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | epo | Esperanto | Artificial Language | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | | eri | Ogea | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | @@ -924,7 +951,7 @@ The following tables give you an overview of the tasks in MTEB. | fin | Finnish | Uralic | 3 | 5 | 1 | 0 | 1 | 1 | 2 | 5 | 1 | 0 | 0 | 19 | | fon | Fon | Atlantic-Congo | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | for | Fore | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| fra | French | Indo-European | 7 | 13 | 8 | 0 | 1 | 5 | 3 | 15 | 4 | 0 | 1 | 57 | +| fra | French | Indo-European | 7 | 13 | 8 | 0 | 1 | 6 | 3 | 15 | 4 | 0 | 1 | 58 | | fry | Western Frisian | Indo-European | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | fuc | Pulaar | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | fue | Borgu Fulfulde | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | @@ -981,7 +1008,7 @@ The following tables give you an overview of the tasks in MTEB. | hch | Huichol | Uto-Aztecan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | heb | Hebrew | Afro-Asiatic | 4 | 5 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 11 | | heg | Helong | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| hin | Hindi | Indo-European | 9 | 12 | 2 | 0 | 0 | 1 | 2 | 10 | 2 | 0 | 0 | 38 | +| hin | Hindi | Indo-European | 9 | 12 | 2 | 0 | 0 | 2 | 2 | 10 | 2 | 0 | 0 | 39 | | hix | Hixkaryána | Cariban | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | hla | Halia | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | hlt | Matu Chin | Sino-Tibetan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | @@ -1030,7 +1057,7 @@ The following tables give you an overview of the tasks in MTEB. | jid | Bu (Kaduna State) | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | jiv | Shuar | Chicham | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | jni | Janji | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| jpn | Japanese | Japonic | 5 | 8 | 3 | 0 | 0 | 1 | 3 | 13 | 2 | 0 | 0 | 35 | +| jpn | Japanese | Japonic | 5 | 8 | 3 | 0 | 0 | 2 | 3 | 13 | 2 | 0 | 0 | 36 | | jvn | Caribbean Javanese | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kab | Kabyle | Afro-Asiatic | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | kac | Kachin | Sino-Tibetan | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | @@ -1086,7 +1113,7 @@ The following tables give you an overview of the tasks in MTEB. | knj | Western Kanjobal | Mayan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | knv | Tabo | Unclassified | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kon | Kongo | Unclassified | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | -| kor | Korean | Koreanic | 4 | 8 | 1 | 0 | 1 | 2 | 1 | 9 | 3 | 0 | 0 | 29 | +| kor | Korean | Koreanic | 4 | 8 | 1 | 0 | 1 | 3 | 1 | 9 | 3 | 0 | 0 | 30 | | kos | Kosraean | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kpf | Komba | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kpg | Kapingamarangi | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | @@ -1231,7 +1258,7 @@ The following tables give you an overview of the tasks in MTEB. | mqb | Mbuko | Afro-Asiatic | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mqj | Mamasa | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mri | Maori | Austronesian | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | -| msa | Malay (macrolanguage) | Unclassified | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | +| msa | Malay (macrolanguage) | Unclassified | 1 | 2 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 4 | | msb | Masbatenyo | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | msc | Sankaran Maninka | Mande | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | msk | Mansaka | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | @@ -1292,7 +1319,7 @@ The following tables give you an overview of the tasks in MTEB. | nij | Ngaju | Austronesian | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | | nin | Ninzo | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nko | Nkonya | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| nld | Dutch | Indo-European | 6 | 6 | 1 | 0 | 1 | 0 | 1 | 2 | 2 | 0 | 0 | 19 | +| nld | Dutch | Indo-European | 6 | 6 | 1 | 0 | 1 | 1 | 1 | 2 | 2 | 0 | 0 | 20 | | nlg | Gela | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nna | Nyangumarta | Pama-Nyungan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nno | Norwegian Nynorsk | Unclassified | 4 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | @@ -1364,7 +1391,7 @@ The following tables give you an overview of the tasks in MTEB. | poi | Highland Popoluca | Mixe-Zoque | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | pol | Polish | Indo-European | 4 | 11 | 4 | 0 | 1 | 4 | 0 | 18 | 4 | 0 | 0 | 46 | | pon | Pohnpeian | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| por | Portuguese | Indo-European | 4 | 9 | 1 | 0 | 2 | 2 | 1 | 5 | 3 | 0 | 0 | 27 | +| por | Portuguese | Indo-European | 4 | 9 | 1 | 0 | 2 | 3 | 1 | 5 | 3 | 0 | 0 | 28 | | poy | Pogolo | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ppo | Folopa | Teberan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | prf | Paranan | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | @@ -1449,7 +1476,7 @@ The following tables give you an overview of the tasks in MTEB. | soq | Kanasi | Dagan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | sot | Southern Sotho | Atlantic-Congo | 1 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 5 | | soy | Miyobe | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| spa | Spanish | Indo-European | 4 | 13 | 4 | 0 | 1 | 2 | 2 | 13 | 4 | 0 | 0 | 43 | +| spa | Spanish | Indo-European | 4 | 13 | 4 | 0 | 1 | 3 | 2 | 13 | 4 | 0 | 0 | 44 | | spl | Selepet | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | spm | Akukem | Ramu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | spp | Supyire Senoufo | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | @@ -1546,7 +1573,7 @@ The following tables give you an overview of the tasks in MTEB. | tuk | Turkmen | Turkic | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | | tum | Tumbuka | Atlantic-Congo | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | tuo | Tucano | Tucanoan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| tur | Turkish | Turkic | 4 | 7 | 1 | 0 | 0 | 2 | 0 | 3 | 2 | 0 | 0 | 19 | +| tur | Turkish | Turkic | 4 | 7 | 1 | 0 | 0 | 3 | 0 | 3 | 2 | 0 | 0 | 20 | | tvk | Southeast Ambrym | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | twi | Twi | Unclassified | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | | txq | Tii | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | @@ -1656,7 +1683,7 @@ The following tables give you an overview of the tasks in MTEB. | zaw | Mitla Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zca | Coatecas Altas Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zga | Kinga | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| zho | Chinese | Unclassified | 2 | 2 | 1 | 0 | 0 | 1 | 1 | 13 | 0 | 0 | 0 | 20 | +| zho | Chinese | Unclassified | 2 | 2 | 1 | 0 | 0 | 2 | 1 | 13 | 0 | 0 | 0 | 21 | | zia | Zia | Nuclear Trans New Guinea | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ziw | Zigula | Atlantic-Congo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zlm | Malay (individual language) | Austronesian | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | @@ -1675,7 +1702,7 @@ The following tables give you an overview of the tasks in MTEB. | zty | Yatee Zapotec | Otomanguean | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zul | Zulu | Atlantic-Congo | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 7 | | zyp | Zyphe Chin | Sino-Tibetan | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | -| Total | None | None | None | 1394 | 795 | 304 | 3 | 28 | 67 | 51 | 473 | 85 | 2 | 2 | +| Total | None | None | None | 1395 | 812 | 306 | 3 | 28 | 84 | 51 | 475 | 85 | 2 | 2 | diff --git a/mteb/abstasks/TaskMetadata.py b/mteb/abstasks/TaskMetadata.py index 07c4f97a04..b04753d877 100644 --- a/mteb/abstasks/TaskMetadata.py +++ b/mteb/abstasks/TaskMetadata.py @@ -70,6 +70,7 @@ "Web", "Written", "Programming", + "Chemistry", ] SAMPLE_CREATION_METHOD = Literal[ diff --git a/mteb/benchmarks/benchmarks.py b/mteb/benchmarks/benchmarks.py index 233c7a79b3..415b11eddb 100644 --- a/mteb/benchmarks/benchmarks.py +++ b/mteb/benchmarks/benchmarks.py @@ -1232,3 +1232,47 @@ def load_results( primaryClass={cs.CL} }""", ) + +CHEMTEB = Benchmark( + name="ChemTEB", + tasks=get_tasks( + tasks=[ + "PubChemSMILESBitextMining", + "SDSEyeProtectionClassification", + "SDSGlovesClassification", + "WikipediaBioMetChemClassification", + "WikipediaGreenhouseEnantiopureClassification", + "WikipediaSolidStateColloidalClassification", + "WikipediaOrganicInorganicClassification", + "WikipediaCryobiologySeparationClassification", + "WikipediaChemistryTopicsClassification", + "WikipediaTheoreticalAppliedClassification", + "WikipediaChemFieldsClassification", + "WikipediaLuminescenceClassification", + "WikipediaIsotopesFissionClassification", + "WikipediaSaltsSemiconductorsClassification", + "WikipediaBiolumNeurochemClassification", + "WikipediaCrystallographyAnalyticalClassification", + "WikipediaCompChemSpectroscopyClassification", + "WikipediaChemEngSpecialtiesClassification", + "WikipediaChemistryTopicsClustering", + "WikipediaSpecialtiesInChemistryClustering", + "PubChemAISentenceParaphrasePC", + "PubChemSMILESPC", + "PubChemSynonymPC", + "PubChemWikiParagraphsPC", + "PubChemWikiPairClassification", + "ChemNQRetrieval", + "ChemHotpotQARetrieval", + ], + ), + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} +}""", +) diff --git a/mteb/models/bedrock_models.py b/mteb/models/bedrock_models.py new file mode 100644 index 0000000000..4616209df1 --- /dev/null +++ b/mteb/models/bedrock_models.py @@ -0,0 +1,264 @@ +from __future__ import annotations + +import json +import logging +import re +from functools import partial +from typing import Any + +import numpy as np +import tqdm + +from mteb.encoder_interface import PromptType +from mteb.model_meta import ModelMeta +from mteb.models.cohere_models import model_prompts as cohere_model_prompts +from mteb.models.cohere_models import supported_languages as cohere_supported_languages +from mteb.requires_package import requires_package + +from .wrapper import Wrapper + +logger = logging.getLogger(__name__) + + +class BedrockWrapper(Wrapper): + def __init__( + self, + model_id: str, + provider: str, + max_tokens: int, + model_prompts: dict[str, str] | None = None, + **kwargs, + ) -> None: + requires_package(self, "boto3", "The AWS SDK for Python") + import boto3 + + boto3_session = boto3.session.Session() + region_name = boto3_session.region_name + self._client = boto3.client("bedrock-runtime", region_name) + + self._model_id = model_id + self._provider = provider.lower() + + if self._provider == "cohere": + self.model_prompts = ( + self.validate_task_to_prompt_name(model_prompts) + if model_prompts + else None + ) + self._max_batch_size = 96 + self._max_sequence_length = max_tokens * 4 + else: + self._max_tokens = max_tokens + + def encode( + self, + sentences: list[str], + *, + task_name: str | None = None, + prompt_type: PromptType | None = None, + **kwargs: Any, + ) -> np.ndarray: + requires_package(self, "boto3", "Amazon Bedrock") + show_progress_bar = ( + False + if "show_progress_bar" not in kwargs + else kwargs.pop("show_progress_bar") + ) + if self._provider == "amazon": + return self._encode_amazon(sentences, show_progress_bar) + elif self._provider == "cohere": + prompt_name = self.get_prompt_name( + self.model_prompts, task_name, prompt_type + ) + cohere_task_type = self.model_prompts.get(prompt_name, "search_document") + return self._encode_cohere(sentences, cohere_task_type, show_progress_bar) + else: + raise ValueError( + f"Unknown provider '{self._provider}'. Must be 'amazon' or 'cohere'." + ) + + def _encode_amazon( + self, sentences: list[str], show_progress_bar: bool = False + ) -> np.ndarray: + from botocore.exceptions import ValidationError + + all_embeddings = [] + # https://docs.aws.amazon.com/bedrock/latest/userguide/titan-embedding-models.html + max_sequence_length = int(self._max_tokens * 4.5) + + for sentence in tqdm.tqdm( + sentences, leave=False, disable=not show_progress_bar + ): + if len(sentence) > max_sequence_length: + truncated_sentence = sentence[:max_sequence_length] + else: + truncated_sentence = sentence + + try: + embedding = self._embed_amazon(truncated_sentence) + all_embeddings.append(embedding) + + except ValidationError as e: + error_str = str(e) + pattern = r"request input token count:\s*(\d+)" + match = re.search(pattern, error_str) + if match: + num_tokens = int(match.group(1)) + + ratio = 0.9 * (self._max_tokens / num_tokens) + dynamic_cutoff = int(len(truncated_sentence) * ratio) + + embedding = self._embed_amazon(truncated_sentence[:dynamic_cutoff]) + all_embeddings.append(embedding) + else: + raise e + + return np.array(all_embeddings) + + def _encode_cohere( + self, + sentences: list[str], + cohere_task_type: str, + show_progress_bar: bool = False, + ) -> np.ndarray: + batches = [ + sentences[i : i + self._max_batch_size] + for i in range(0, len(sentences), self._max_batch_size) + ] + + all_embeddings = [] + + for batch in tqdm.tqdm(batches, leave=False, disable=not show_progress_bar): + response = self._client.invoke_model( + body=json.dumps( + { + "texts": [sent[: self._max_sequence_length] for sent in batch], + "input_type": cohere_task_type, + } + ), + modelId=self._model_id, + accept="*/*", + contentType="application/json", + ) + all_embeddings.extend(self._to_numpy(response)) + + return np.array(all_embeddings) + + def _embed_amazon(self, sentence: str) -> np.ndarray: + response = self._client.invoke_model( + body=json.dumps({"inputText": sentence}), + modelId=self._model_id, + accept="application/json", + contentType="application/json", + ) + return self._to_numpy(response) + + def _to_numpy(self, embedding_response) -> np.ndarray: + response = json.loads(embedding_response.get("body").read()) + key = "embedding" if self._provider == "amazon" else "embeddings" + return np.array(response[key]) + + +amazon_titan_embed_text_v1 = ModelMeta( + name="bedrock/amazon-titan-embed-text-v1", + revision="1", + release_date="2023-09-27", + languages=None, # not specified + loader=partial( + BedrockWrapper, + model_id="amazon.titan-embed-text-v1", + provider="amazon", + max_tokens=8192, + ), + max_tokens=8192, + embed_dim=1536, + open_weights=False, + n_parameters=None, + public_training_code=None, + public_training_data=None, # assumed + training_datasets=None, + license=None, + reference="https://aws.amazon.com/about-aws/whats-new/2023/09/amazon-titan-embeddings-generally-available/", + similarity_fn_name="cosine", + framework=["API"], + use_instructions=False, +) + +amazon_titan_embed_text_v2 = ModelMeta( + name="bedrock/amazon-titan-embed-text-v2", + revision="1", + release_date="2024-04-30", + languages=None, # not specified + loader=partial( + BedrockWrapper, + model_id="amazon.titan-embed-text-v2:0", + provider="amazon", + max_tokens=8192, + ), + max_tokens=8192, + embed_dim=1024, + open_weights=False, + n_parameters=None, + public_training_code=None, + public_training_data=None, # assumed + training_datasets=None, + license=None, + reference="https://aws.amazon.com/about-aws/whats-new/2024/04/amazon-titan-text-embeddings-v2-amazon-bedrock/", + similarity_fn_name="cosine", + framework=["API"], + use_instructions=False, +) +# Note: For the original Cohere API implementation, refer to: +# https://github.com/embeddings-benchmark/mteb/blob/main/mteb/models/cohere_models.py +# This implementation uses the Amazon Bedrock endpoint for Cohere models. +cohere_embed_english_v3 = ModelMeta( + loader=partial( + BedrockWrapper, + model_id="cohere.embed-english-v3", + provider="cohere", + max_tokens=512, + model_prompts=cohere_model_prompts, + ), + name="bedrock/cohere-embed-english-v3", + languages=["eng-Latn"], + open_weights=False, + reference="https://cohere.com/blog/introducing-embed-v3", + revision="1", + release_date="2023-11-02", + n_parameters=None, + public_training_code=None, + public_training_data=None, # assumed + training_datasets=None, + max_tokens=512, + embed_dim=1024, + license=None, + similarity_fn_name="cosine", + framework=["API"], + use_instructions=True, +) + +cohere_embed_multilingual_v3 = ModelMeta( + loader=partial( + BedrockWrapper, + model_id="cohere.embed-multilingual-v3", + provider="cohere", + max_tokens=512, + model_prompts=cohere_model_prompts, + ), + name="bedrock/cohere-embed-multilingual-v3", + languages=cohere_supported_languages, + open_weights=False, + reference="https://cohere.com/blog/introducing-embed-v3", + revision="1", + release_date="2023-11-02", + n_parameters=None, + public_training_code=None, + public_training_data=None, # assumed + training_datasets=None, + max_tokens=512, + embed_dim=1024, + license=None, + similarity_fn_name="cosine", + framework=["API"], + use_instructions=True, +) diff --git a/mteb/models/overview.py b/mteb/models/overview.py index ccc8fbdda4..399d8359c5 100644 --- a/mteb/models/overview.py +++ b/mteb/models/overview.py @@ -13,6 +13,7 @@ from mteb.model_meta import ModelMeta from mteb.models import ( arctic_models, + bedrock_models, bge_models, bm25, cde_models, @@ -100,6 +101,7 @@ uae_models, text2vec_models, stella_models, + bedrock_models, uae_models, voyage_models, ] diff --git a/mteb/tasks/BitextMining/__init__.py b/mteb/tasks/BitextMining/__init__.py index c176077215..1cec5d5ddc 100644 --- a/mteb/tasks/BitextMining/__init__.py +++ b/mteb/tasks/BitextMining/__init__.py @@ -1,6 +1,7 @@ from __future__ import annotations from .dan.BornholmskBitextMining import * +from .eng.PubChemSMILESBitextMining import * from .kat.TbilisiCityHallBitextMining import * from .multilingual.BibleNLPBitextMining import * from .multilingual.BUCCBitextMining import * diff --git a/mteb/tasks/BitextMining/eng/PubChemSMILESBitextMining.py b/mteb/tasks/BitextMining/eng/PubChemSMILESBitextMining.py new file mode 100644 index 0000000000..4951d8c596 --- /dev/null +++ b/mteb/tasks/BitextMining/eng/PubChemSMILESBitextMining.py @@ -0,0 +1,68 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskBitextMining import AbsTaskBitextMining +from mteb.abstasks.MultilingualTask import MultilingualTask +from mteb.abstasks.TaskMetadata import TaskMetadata + +COL_MAPPING = { + "iso-title": {"title": "sentence1", "isomeric_smiles": "sentence2"}, + "iso-desc": {"description": "sentence1", "isomeric_smiles": "sentence2"}, + "canon-title": {"title": "sentence1", "canonical_smiles": "sentence2"}, + "canon-desc": {"description": "sentence1", "canonical_smiles": "sentence2"}, +} + +EVAL_LANGS = { + "iso-title": ["eng-Latn", "eng-Latn"], + "iso-desc": ["eng-Latn", "eng-Latn"], + "canon-title": ["eng-Latn", "eng-Latn"], + "canon-desc": ["eng-Latn", "eng-Latn"], +} + + +class PubChemSMILESBitextMining(MultilingualTask, AbsTaskBitextMining): + metadata = TaskMetadata( + name="PubChemSMILESBitextMining", + dataset={ + "path": "BASF-AI/PubChemSMILESBitextMining", + "revision": "36700ea628118312ebf2f90ad2353a9a8f188dc9", + }, + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + type="BitextMining", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=EVAL_LANGS, + main_score="f1", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + @article{kim2023pubchem, + title={PubChem 2023 update}, + author={Kim, Sunghwan and Chen, Jie and Cheng, Tiejun and Gindulyte, Asta and He, Jia and He, Siqian and Li, Qingliang and Shoemaker, Benjamin A and Thiessen, Paul A and Yu, Bo and others}, + journal={Nucleic acids research}, + volume={51}, + number={D1}, + pages={D1373--D1380}, + year={2023}, + publisher={Oxford University Press} + } + """, + ) + + def dataset_transform(self): + for subset in self.hf_subsets: + self.dataset[subset] = self.dataset[subset].rename_columns( + COL_MAPPING[subset] + ) diff --git a/mteb/tasks/BitextMining/eng/__init__.py b/mteb/tasks/BitextMining/eng/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/mteb/tasks/Classification/__init__.py b/mteb/tasks/Classification/__init__.py index 3e80ae2181..12b0623b6b 100644 --- a/mteb/tasks/Classification/__init__.py +++ b/mteb/tasks/Classification/__init__.py @@ -33,10 +33,27 @@ from .eng.NewsClassification import * from .eng.PatentClassification import * from .eng.PoemSentimentClassification import * +from .eng.SDSEyeProtectionClassification import * +from .eng.SDSGlovesClassification import * from .eng.ToxicChatClassification import * from .eng.ToxicConversationsClassification import * from .eng.TweetSentimentExtractionClassification import * from .eng.TweetTopicSingleClassification import * +from .eng.WikipediaBiolumNeurochemClassification import * +from .eng.WikipediaBioMetChemClassification import * +from .eng.WikipediaChemEngSpecialtiesClassification import * +from .eng.WikipediaChemFieldsClassification import * +from .eng.WikipediaChemistryTopicsClassification import * +from .eng.WikipediaCompChemSpectroscopyClassification import * +from .eng.WikipediaCryobiologySeparationClassification import * +from .eng.WikipediaCrystallographyAnalyticalClassification import * +from .eng.WikipediaGreenhouseEnantiopureClassification import * +from .eng.WikipediaIsotopesFissionClassification import * +from .eng.WikipediaLuminescenceClassification import * +from .eng.WikipediaOrganicInorganicClassification import * +from .eng.WikipediaSaltsSemiconductorsClassification import * +from .eng.WikipediaSolidStateColloidalClassification import * +from .eng.WikipediaTheoreticalAppliedClassification import * from .eng.YahooAnswersTopicsClassification import * from .eng.YelpReviewFullClassification import * from .est.estonian_valence import * diff --git a/mteb/tasks/Classification/eng/SDSEyeProtectionClassification.py b/mteb/tasks/Classification/eng/SDSEyeProtectionClassification.py new file mode 100644 index 0000000000..197060ba0c --- /dev/null +++ b/mteb/tasks/Classification/eng/SDSEyeProtectionClassification.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class SDSEyeProtectionClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="SDSEyeProtectionClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/SDSEyeProtectionClassification", + "revision": "35cbe5ee544dd26e343238a333de4568e6f77819", + }, + type="Classification", + category="s2p", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="LM-generated and reviewed", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + @inproceedings{pereira2020msds, + title={MSDS-OPP: Operator Procedures Prediction in Material Safety Data Sheets}, + author={Pereira, Eliseu}, + booktitle={15th Doctoral Symposium}, + pages={42}, + year={2020} + } + """, + ) diff --git a/mteb/tasks/Classification/eng/SDSGlovesClassification.py b/mteb/tasks/Classification/eng/SDSGlovesClassification.py new file mode 100644 index 0000000000..ac471d58e9 --- /dev/null +++ b/mteb/tasks/Classification/eng/SDSGlovesClassification.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class SDSGlovesClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="SDSGlovesClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/SDSGlovesClassification", + "revision": "c723236c5ec417d79512e6104aca9d2cd88168f6", + }, + type="Classification", + category="s2p", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="LM-generated and reviewed", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + @inproceedings{pereira2020msds, + title={MSDS-OPP: Operator Procedures Prediction in Material Safety Data Sheets}, + author={Pereira, Eliseu}, + booktitle={15th Doctoral Symposium}, + pages={42}, + year={2020} + } + """, + ) diff --git a/mteb/tasks/Classification/eng/WikipediaBioMetChemClassification.py b/mteb/tasks/Classification/eng/WikipediaBioMetChemClassification.py new file mode 100644 index 0000000000..3b494f46f6 --- /dev/null +++ b/mteb/tasks/Classification/eng/WikipediaBioMetChemClassification.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaBioMetChemClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="WikipediaBioMetChemClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaEasy2GeneExpressionVsMetallurgyClassification", + "revision": "6ac491e5de9070c6dd434b31e76d3d379123dcff", + }, + type="Classification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/Classification/eng/WikipediaBiolumNeurochemClassification.py b/mteb/tasks/Classification/eng/WikipediaBiolumNeurochemClassification.py new file mode 100644 index 0000000000..623ec8fc66 --- /dev/null +++ b/mteb/tasks/Classification/eng/WikipediaBiolumNeurochemClassification.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaBiolumNeurochemClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="WikipediaBiolumNeurochemClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaMedium2BioluminescenceVsNeurochemistryClassification", + "revision": "2f68b7d34c2be896e46b14533573b366e59e5aae", + }, + type="Classification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/Classification/eng/WikipediaChemEngSpecialtiesClassification.py b/mteb/tasks/Classification/eng/WikipediaChemEngSpecialtiesClassification.py new file mode 100644 index 0000000000..c95abcd4f2 --- /dev/null +++ b/mteb/tasks/Classification/eng/WikipediaChemEngSpecialtiesClassification.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaChemEngSpecialtiesClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="WikipediaChemEngSpecialtiesClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaMedium5Classification", + "revision": "f81a76a2fb690e5d5bd7a26dd07e85cdf8405dfb", + }, + type="Classification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/Classification/eng/WikipediaChemFieldsClassification.py b/mteb/tasks/Classification/eng/WikipediaChemFieldsClassification.py new file mode 100644 index 0000000000..7c0179fb1e --- /dev/null +++ b/mteb/tasks/Classification/eng/WikipediaChemFieldsClassification.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaChemFieldsClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="WikipediaChemFieldsClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaEZ10Classification", + "revision": "a75fae77759acc115f015f2b856baa47776d733d", + }, + type="Classification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/Classification/eng/WikipediaChemistryTopicsClassification.py b/mteb/tasks/Classification/eng/WikipediaChemistryTopicsClassification.py new file mode 100644 index 0000000000..02751b1a32 --- /dev/null +++ b/mteb/tasks/Classification/eng/WikipediaChemistryTopicsClassification.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaChemistryTopicsClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="WikipediaChemistryTopicsClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaEasy10Classification", + "revision": "d8fb355db2248f95df8ea410a43aa1db1ee96ba4", + }, + type="Classification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/Classification/eng/WikipediaCompChemSpectroscopyClassification.py b/mteb/tasks/Classification/eng/WikipediaCompChemSpectroscopyClassification.py new file mode 100644 index 0000000000..28a42ac044 --- /dev/null +++ b/mteb/tasks/Classification/eng/WikipediaCompChemSpectroscopyClassification.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaCompChemSpectroscopyClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="WikipediaCompChemSpectroscopyClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaMedium2ComputationalVsSpectroscopistsClassification", + "revision": "474d706a22b0451b5846d623aa4b4234ba5b0513", + }, + type="Classification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/Classification/eng/WikipediaCryobiologySeparationClassification.py b/mteb/tasks/Classification/eng/WikipediaCryobiologySeparationClassification.py new file mode 100644 index 0000000000..0e01454298 --- /dev/null +++ b/mteb/tasks/Classification/eng/WikipediaCryobiologySeparationClassification.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaCryobiologySeparationClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="WikipediaCryobiologySeparationClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaEasy5Classification", + "revision": "858633e882dadd1ec6a0d220f7549bcafd379236", + }, + type="Classification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/Classification/eng/WikipediaCrystallographyAnalyticalClassification.py b/mteb/tasks/Classification/eng/WikipediaCrystallographyAnalyticalClassification.py new file mode 100644 index 0000000000..724ffc4249 --- /dev/null +++ b/mteb/tasks/Classification/eng/WikipediaCrystallographyAnalyticalClassification.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaCrystallographyAnalyticalClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="WikipediaCrystallographyAnalyticalClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaMedium2CrystallographyVsChromatographyTitrationpHClassification", + "revision": "740565a6a853aaed1114a13bdfd5fd46857b4f11", + }, + type="Classification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/Classification/eng/WikipediaGreenhouseEnantiopureClassification.py b/mteb/tasks/Classification/eng/WikipediaGreenhouseEnantiopureClassification.py new file mode 100644 index 0000000000..b701584a70 --- /dev/null +++ b/mteb/tasks/Classification/eng/WikipediaGreenhouseEnantiopureClassification.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaGreenhouseEnantiopureClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="WikipediaGreenhouseEnantiopureClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaEasy2GreenhouseVsEnantiopureClassification", + "revision": "0cfc1a83b6ed832454e8f4f93f7a0e26208274d9", + }, + type="Classification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/Classification/eng/WikipediaIsotopesFissionClassification.py b/mteb/tasks/Classification/eng/WikipediaIsotopesFissionClassification.py new file mode 100644 index 0000000000..252ad85ed9 --- /dev/null +++ b/mteb/tasks/Classification/eng/WikipediaIsotopesFissionClassification.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaIsotopesFissionClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="WikipediaIsotopesFissionClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaHard2IsotopesVsFissionProductsNuclearFissionClassification", + "revision": "897743346c7c794264f7dbfadc3978aa2895e8e2", + }, + type="Classification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/Classification/eng/WikipediaLuminescenceClassification.py b/mteb/tasks/Classification/eng/WikipediaLuminescenceClassification.py new file mode 100644 index 0000000000..8e115b59d4 --- /dev/null +++ b/mteb/tasks/Classification/eng/WikipediaLuminescenceClassification.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaLuminescenceClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="WikipediaLuminescenceClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaHard2BioluminescenceVsLuminescenceClassification", + "revision": "21c4dcebe2c5b36a35292e6441e7a10b59bf4896", + }, + type="Classification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/Classification/eng/WikipediaOrganicInorganicClassification.py b/mteb/tasks/Classification/eng/WikipediaOrganicInorganicClassification.py new file mode 100644 index 0000000000..0ad784b69b --- /dev/null +++ b/mteb/tasks/Classification/eng/WikipediaOrganicInorganicClassification.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaOrganicInorganicClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="WikipediaOrganicInorganicClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaEasy2SpecialClassification", + "revision": "96d1d9b37c4693f74c46c83d63a290573f78d511", + }, + type="Classification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/Classification/eng/WikipediaSaltsSemiconductorsClassification.py b/mteb/tasks/Classification/eng/WikipediaSaltsSemiconductorsClassification.py new file mode 100644 index 0000000000..a409f87c8d --- /dev/null +++ b/mteb/tasks/Classification/eng/WikipediaSaltsSemiconductorsClassification.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaSaltsSemiconductorsClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="WikipediaSaltsSemiconductorsClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaHard2SaltsVsSemiconductorMaterialsClassification", + "revision": "9e5415a096012fa2d1f3a929952cf9859e4550e7", + }, + type="Classification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/Classification/eng/WikipediaSolidStateColloidalClassification.py b/mteb/tasks/Classification/eng/WikipediaSolidStateColloidalClassification.py new file mode 100644 index 0000000000..43f95c50f3 --- /dev/null +++ b/mteb/tasks/Classification/eng/WikipediaSolidStateColloidalClassification.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaSolidStateColloidalClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="WikipediaSolidStateColloidalClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaEasy2SolidStateVsColloidalClassification", + "revision": "7d8df44e588b6143d4856c781f72f919fa0599a7", + }, + type="Classification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/Classification/eng/WikipediaTheoreticalAppliedClassification.py b/mteb/tasks/Classification/eng/WikipediaTheoreticalAppliedClassification.py new file mode 100644 index 0000000000..f33b02f4bb --- /dev/null +++ b/mteb/tasks/Classification/eng/WikipediaTheoreticalAppliedClassification.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClassification import AbsTaskClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaTheoreticalAppliedClassification(AbsTaskClassification): + metadata = TaskMetadata( + name="WikipediaTheoreticalAppliedClassification", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaEZ2Classification", + "revision": "7896906653d31d7102a143d7f55d67cd688e3147", + }, + type="Classification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="accuracy", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/Clustering/__init__.py b/mteb/tasks/Clustering/__init__.py index 014796a4cb..de27839290 100644 --- a/mteb/tasks/Clustering/__init__.py +++ b/mteb/tasks/Clustering/__init__.py @@ -18,6 +18,8 @@ from .eng.StackExchangeClusteringP2P import * from .eng.TwentyNewsgroupsClustering import * from .eng.WikiCitiesClustering import * +from .eng.WikipediaChemistrySpecialtiesClustering import * +from .eng.WikipediaChemistryTopicsClustering import * from .fra.AlloProfClusteringP2P import * from .fra.AlloProfClusteringS2S import * from .fra.HALClusteringS2S import * diff --git a/mteb/tasks/Clustering/eng/WikipediaChemistrySpecialtiesClustering.py b/mteb/tasks/Clustering/eng/WikipediaChemistrySpecialtiesClustering.py new file mode 100644 index 0000000000..a4e4082a69 --- /dev/null +++ b/mteb/tasks/Clustering/eng/WikipediaChemistrySpecialtiesClustering.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClustering import AbsTaskClustering +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaChemistrySpecialtiesClustering(AbsTaskClustering): + metadata = TaskMetadata( + name="WikipediaSpecialtiesInChemistryClustering", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaMedium5Clustering", + "revision": "7754d8d296f9f4c3af1c6426fab36304730ccddf", + }, + type="Clustering", + category="s2p", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="v_measure", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/Clustering/eng/WikipediaChemistryTopicsClustering.py b/mteb/tasks/Clustering/eng/WikipediaChemistryTopicsClustering.py new file mode 100644 index 0000000000..bfa5e1fcf3 --- /dev/null +++ b/mteb/tasks/Clustering/eng/WikipediaChemistryTopicsClustering.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskClustering import AbsTaskClustering +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class WikipediaChemistryTopicsClustering(AbsTaskClustering): + metadata = TaskMetadata( + name="WikipediaChemistryTopicsClustering", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/WikipediaEasy10Clustering", + "revision": "0a0886b06acbfc735bca6a71b21ce1e5cb92a37b", + }, + type="Clustering", + category="s2p", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="v_measure", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + """, + ) diff --git a/mteb/tasks/PairClassification/__init__.py b/mteb/tasks/PairClassification/__init__.py index c2057a4952..e229195df0 100644 --- a/mteb/tasks/PairClassification/__init__.py +++ b/mteb/tasks/PairClassification/__init__.py @@ -4,6 +4,10 @@ from .ces.CTKFactsNLI import * from .deu.FalseFriendsDeEnPC import * from .eng.LegalBenchPC import * +from .eng.PubChemAISentenceParaphrasePC import * +from .eng.PubChemSMILESPC import * +from .eng.PubChemSynonymPC import * +from .eng.PubChemWikiParagraphsPC import * from .eng.SprintDuplicateQuestionsPC import * from .eng.TwitterSemEval2015PC import * from .eng.TwitterURLCorpusPC import * @@ -13,6 +17,7 @@ from .kor.KlueNLI import * from .multilingual.OpusparcusPC import * from .multilingual.PawsXPairClassification import * +from .multilingual.PubChemWikiPairClassification import * from .multilingual.RTE3 import * from .multilingual.XNLI import * from .multilingual.XStance import * diff --git a/mteb/tasks/PairClassification/eng/PubChemAISentenceParaphrasePC.py b/mteb/tasks/PairClassification/eng/PubChemAISentenceParaphrasePC.py new file mode 100644 index 0000000000..f453ebee31 --- /dev/null +++ b/mteb/tasks/PairClassification/eng/PubChemAISentenceParaphrasePC.py @@ -0,0 +1,60 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskPairClassification import AbsTaskPairClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class PubChemAISentenceParaphrasePC(AbsTaskPairClassification): + metadata = TaskMetadata( + name="PubChemAISentenceParaphrasePC", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/PubChemAISentenceParaphrasePC", + "revision": "f33a205966ce032f957c3a22f4f9e378f89a2c56", + }, + type="PairClassification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="max_ap", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="LM-generated", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + @article{kim2023pubchem, + title={PubChem 2023 update}, + author={Kim, Sunghwan and Chen, Jie and Cheng, Tiejun and Gindulyte, Asta and He, Jia and He, Siqian and Li, Qingliang and Shoemaker, Benjamin A and Thiessen, Paul A and Yu, Bo and others}, + journal={Nucleic acids research}, + volume={51}, + number={D1}, + pages={D1373--D1380}, + year={2023}, + publisher={Oxford University Press} + } + """, + ) + + def dataset_transform(self): + _dataset = {} + for split in self.metadata.eval_splits: + hf_dataset = self.dataset[split] + _dataset[split] = [ + { + "sentence1": hf_dataset["sent1"], + "sentence2": hf_dataset["sent2"], + "labels": hf_dataset["labels"], + } + ] + self.dataset = _dataset diff --git a/mteb/tasks/PairClassification/eng/PubChemSMILESPC.py b/mteb/tasks/PairClassification/eng/PubChemSMILESPC.py new file mode 100644 index 0000000000..b3e297e043 --- /dev/null +++ b/mteb/tasks/PairClassification/eng/PubChemSMILESPC.py @@ -0,0 +1,128 @@ +from __future__ import annotations + +import datasets + +from mteb.abstasks.AbsTaskPairClassification import AbsTaskPairClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + +_DATASET_COLUMN_MAP = [ + { + "name": "iso-desc", + "sent1": "description", + "sent2": "isomeric_smiles", + "labels": "labels", + }, + { + "name": "iso-title", + "sent1": "title", + "sent2": "isomeric_smiles", + "labels": "labels", + }, + { + "name": "canon-desc", + "sent1": "description", + "sent2": "canonical_smiles", + "labels": "labels", + }, + { + "name": "canon-title", + "sent1": "title", + "sent2": "canonical_smiles", + "labels": "labels", + }, +] + + +class PubChemSMILESPC(AbsTaskPairClassification): + metadata = TaskMetadata( + name="PubChemSMILESPC", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/PubChemSMILESPairClassification", + "revision": "7ba40b69f5fe6ffe4cc189aac9e1710913c73c8a", + }, + type="PairClassification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="max_ap", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + @article{kim2023pubchem, + title={PubChem 2023 update}, + author={Kim, Sunghwan and Chen, Jie and Cheng, Tiejun and Gindulyte, Asta and He, Jia and He, Siqian and Li, Qingliang and Shoemaker, Benjamin A and Thiessen, Paul A and Yu, Bo and others}, + journal={Nucleic acids research}, + volume={51}, + number={D1}, + pages={D1373--D1380}, + year={2023}, + publisher={Oxford University Press} + } + """, + ) + + def load_data(self): + """Load dataset from HuggingFace hub""" + if self.data_loaded: + return + + _hf_dataset = None + for dataset_col_map in _DATASET_COLUMN_MAP: + _dataset = datasets.load_dataset( + self.metadata.dataset["path"], + dataset_col_map["name"], + revision=self.metadata.dataset["revision"], + ) + + _dataset = _dataset.rename_columns( + { + dataset_col_map["sent1"]: "sentence1", + dataset_col_map["sent2"]: "sentence2", + dataset_col_map["labels"]: "labels", + } + ) + + if _hf_dataset is None: + _hf_dataset = _dataset + else: + _hf_dataset["test"] = datasets.concatenate_datasets( + [_hf_dataset["test"], _dataset["test"]] + ) + + self.dataset = _hf_dataset + self.dataset_transform() + self.data_loaded = True + + def dataset_transform(self): + self.dataset = self.stratified_subsampling( + self.dataset, + seed=self.seed, + splits=self.metadata["eval_splits"], + label="labels", + ) + + _dataset = {} + for split in self.metadata.eval_splits: + hf_dataset = self.dataset[split] + _dataset[split] = [ + { + "sentence1": hf_dataset["sentence1"], + "sentence2": hf_dataset["sentence2"], + "labels": hf_dataset["labels"], + } + ] + self.dataset = _dataset diff --git a/mteb/tasks/PairClassification/eng/PubChemSynonymPC.py b/mteb/tasks/PairClassification/eng/PubChemSynonymPC.py new file mode 100644 index 0000000000..6b6dfd81c8 --- /dev/null +++ b/mteb/tasks/PairClassification/eng/PubChemSynonymPC.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskPairClassification import AbsTaskPairClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class PubChemSynonymPC(AbsTaskPairClassification): + metadata = TaskMetadata( + name="PubChemSynonymPC", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/PubChemSynonymPC", + "revision": "5037d69d177c9628fb79cb57eea1299178b28c1b", + }, + type="PairClassification", + category="s2s", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="max_ap", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + @article{kim2023pubchem, + title={PubChem 2023 update}, + author={Kim, Sunghwan and Chen, Jie and Cheng, Tiejun and Gindulyte, Asta and He, Jia and He, Siqian and Li, Qingliang and Shoemaker, Benjamin A and Thiessen, Paul A and Yu, Bo and others}, + journal={Nucleic acids research}, + volume={51}, + number={D1}, + pages={D1373--D1380}, + year={2023}, + publisher={Oxford University Press} + } + """, + ) + + def dataset_transform(self): + _dataset = {} + + for split in self.metadata.eval_splits: + hf_dataset = self.dataset[split] + _dataset[split] = [ + { + "sentence1": hf_dataset["title"], + "sentence2": hf_dataset["synonyms"], + "labels": hf_dataset["labels"], + } + ] + self.dataset = _dataset diff --git a/mteb/tasks/PairClassification/eng/PubChemWikiParagraphsPC.py b/mteb/tasks/PairClassification/eng/PubChemWikiParagraphsPC.py new file mode 100644 index 0000000000..679580f28c --- /dev/null +++ b/mteb/tasks/PairClassification/eng/PubChemWikiParagraphsPC.py @@ -0,0 +1,60 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskPairClassification import AbsTaskPairClassification +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class PubChemWikiParagraphsPC(AbsTaskPairClassification): + metadata = TaskMetadata( + name="PubChemWikiParagraphsPC", + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + dataset={ + "path": "BASF-AI/PubChemWikiParagraphsPC", + "revision": "7fb14716e4106b72f51a16e682e5cd2d67e9bd70", + }, + type="PairClassification", + category="p2p", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="max_ap", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + @article{kim2023pubchem, + title={PubChem 2023 update}, + author={Kim, Sunghwan and Chen, Jie and Cheng, Tiejun and Gindulyte, Asta and He, Jia and He, Siqian and Li, Qingliang and Shoemaker, Benjamin A and Thiessen, Paul A and Yu, Bo and others}, + journal={Nucleic acids research}, + volume={51}, + number={D1}, + pages={D1373--D1380}, + year={2023}, + publisher={Oxford University Press} + } + """, + ) + + def dataset_transform(self): + _dataset = {} + for split in self.metadata.eval_splits: + hf_dataset = self.dataset[split] + _dataset[split] = [ + { + "sentence1": hf_dataset["sent1"], + "sentence2": hf_dataset["sent2"], + "labels": hf_dataset["labels"], + } + ] + self.dataset = _dataset diff --git a/mteb/tasks/PairClassification/multilingual/PubChemWikiPairClassification.py b/mteb/tasks/PairClassification/multilingual/PubChemWikiPairClassification.py new file mode 100644 index 0000000000..59a0605a82 --- /dev/null +++ b/mteb/tasks/PairClassification/multilingual/PubChemWikiPairClassification.py @@ -0,0 +1,77 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskPairClassification import AbsTaskPairClassification +from mteb.abstasks.MultilingualTask import MultilingualTask +from mteb.abstasks.TaskMetadata import TaskMetadata + +_LANGUAGES = { + "de": ["deu-Latn", "eng-Latn"], + "nl": ["nld-Latn", "eng-Latn"], + "zh": ["zho-Hans", "eng-Latn"], + "fr": ["fra-Latn", "eng-Latn"], + "es": ["spa-Latn", "eng-Latn"], + "pt": ["por-Latn", "eng-Latn"], + "ms": ["msa-Latn", "eng-Latn"], + "ko": ["kor-Hang", "eng-Latn"], + "tr": ["tur-Latn", "eng-Latn"], + "hi": ["hin-Deva", "eng-Latn"], + "cs": ["ces-Latn", "eng-Latn"], + "ja": ["jpn-Jpan", "eng-Latn"], +} + + +class PubChemWikiPairClassification(AbsTaskPairClassification, MultilingualTask): + metadata = TaskMetadata( + name="PubChemWikiPairClassification", + dataset={ + "path": "BASF-AI/PubChemWikiMultilingualPC", + "revision": "3412b208896a37e4ebb5ff7b96f6cc313ee9d2e3", + }, + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + category="s2s", + modalities=["text"], + type="PairClassification", + eval_splits=["test"], + eval_langs=_LANGUAGES, + main_score="max_ap", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="created", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + @article{kim2023pubchem, + title={PubChem 2023 update}, + author={Kim, Sunghwan and Chen, Jie and Cheng, Tiejun and Gindulyte, Asta and He, Jia and He, Siqian and Li, Qingliang and Shoemaker, Benjamin A and Thiessen, Paul A and Yu, Bo and others}, + journal={Nucleic acids research}, + volume={51}, + number={D1}, + pages={D1373--D1380}, + year={2023}, + publisher={Oxford University Press} + } + """, + ) + + def dataset_transform(self) -> None: + _dataset = {} + for lang in self.hf_subsets: + _dataset[lang] = {} + hf_dataset = self.dataset[lang][self.metadata.eval_splits[0]] + _dataset[lang]["test"] = [ + { + "sentence1": hf_dataset["sent1"], + "sentence2": hf_dataset["sent2"], + "labels": hf_dataset["labels"], + } + ] + self.dataset = _dataset diff --git a/mteb/tasks/Retrieval/__init__.py b/mteb/tasks/Retrieval/__init__.py index d83df7ec5e..6c146379ea 100644 --- a/mteb/tasks/Retrieval/__init__.py +++ b/mteb/tasks/Retrieval/__init__.py @@ -29,6 +29,8 @@ from .eng.ARCChallengeRetrieval import * from .eng.ArguAnaRetrieval import * from .eng.BrightRetrieval import * +from .eng.ChemHotpotQARetrieval import * +from .eng.ChemNQRetrieval import * from .eng.ClimateFEVERRetrieval import * from .eng.CQADupstackAndroidRetrieval import * from .eng.CQADupstackEnglishRetrieval import * diff --git a/mteb/tasks/Retrieval/eng/ChemHotpotQARetrieval.py b/mteb/tasks/Retrieval/eng/ChemHotpotQARetrieval.py new file mode 100644 index 0000000000..88fbc50df4 --- /dev/null +++ b/mteb/tasks/Retrieval/eng/ChemHotpotQARetrieval.py @@ -0,0 +1,60 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class ChemHotpotQARetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="ChemHotpotQARetrieval", + dataset={ + "path": "BASF-AI/ChemHotpotQARetrieval", + "revision": "1840e8a5ac6ec752bbdd97d543ead0189bc7c25b", + }, + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + type="Retrieval", + category="s2p", + modalities=["text"], + eval_splits=["train", "dev", "test"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="found", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + @inproceedings{yang-etal-2018-hotpotqa, + title = "{H}otpot{QA}: A Dataset for Diverse, Explainable Multi-hop Question Answering", + author = "Yang, Zhilin and + Qi, Peng and + Zhang, Saizheng and + Bengio, Yoshua and + Cohen, William and + Salakhutdinov, Ruslan and + Manning, Christopher D.", + editor = "Riloff, Ellen and + Chiang, David and + Hockenmaier, Julia and + Tsujii, Jun{'}ichi", + booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing", + month = oct # "-" # nov, + year = "2018", + address = "Brussels, Belgium", + publisher = "Association for Computational Linguistics", + url = "https://aclanthology.org/D18-1259", + doi = "10.18653/v1/D18-1259", + pages = "2369--2380", + abstract = "Existing question answering (QA) datasets fail to train QA systems to perform complex reasoning and provide explanations for answers. We introduce HotpotQA, a new dataset with 113k Wikipedia-based question-answer pairs with four key features: (1) the questions require finding and reasoning over multiple supporting documents to answer; (2) the questions are diverse and not constrained to any pre-existing knowledge bases or knowledge schemas; (3) we provide sentence-level supporting facts required for reasoning, allowing QA systems to reason with strong supervision and explain the predictions; (4) we offer a new type of factoid comparison questions to test QA systems{'} ability to extract relevant facts and perform necessary comparison. We show that HotpotQA is challenging for the latest QA systems, and the supporting facts enable models to improve performance and make explainable predictions.", + } +""", + ) diff --git a/mteb/tasks/Retrieval/eng/ChemNQRetrieval.py b/mteb/tasks/Retrieval/eng/ChemNQRetrieval.py new file mode 100644 index 0000000000..1e77971331 --- /dev/null +++ b/mteb/tasks/Retrieval/eng/ChemNQRetrieval.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + + +class ChemNQRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="ChemNQRetrieval", + dataset={ + "path": "BASF-AI/ChemNQRetrieval", + "revision": "5d958fb6b31055495347724d46431ba41309b03a", + }, + description="ChemTEB evaluates the performance of text embedding models on chemical domain data.", + reference="https://arxiv.org/abs/2412.00532", + type="Retrieval", + category="s2p", + modalities=["text"], + eval_splits=["test"], + eval_langs=["eng-Latn"], + main_score="ndcg_at_10", + date=("2024-06-01", "2024-11-30"), + domains=["Chemistry"], + task_subtypes=[], + license="cc-by-nc-sa-4.0", + annotations_creators="derived", + dialect=[], + sample_creation="found", + bibtex_citation=""" + @article{kasmaee2024chemteb, + title={ChemTEB: Chemical Text Embedding Benchmark, an Overview of Embedding Models Performance \& Efficiency on a Specific Domain}, + author={Kasmaee, Ali Shiraee and Khodadad, Mohammad and Saloot, Mohammad Arshi and Sherck, Nick and Dokas, Stephen and Mahyar, Hamidreza and Samiee, Soheila}, + journal={arXiv preprint arXiv:2412.00532}, + year={2024} + } + @article{47761, + title = {Natural Questions: a Benchmark for Question Answering Research}, + author = {Tom Kwiatkowski and Jennimaria Palomaki and Olivia Redfield and Michael Collins and Ankur Parikh + and Chris Alberti and Danielle Epstein and Illia Polosukhin and Matthew Kelcey and Jacob Devlin and Kenton Lee + and Kristina N. Toutanova and Llion Jones and Ming-Wei Chang and Andrew Dai and Jakob Uszkoreit and Quoc Le + and Slav Petrov}, + year = {2019}, + journal = {Transactions of the Association of Computational Linguistics}} + """, + )