Skip to content

Commit

Permalink
disable broken trec-cast datasets (for now)
Browse files Browse the repository at this point in the history
  • Loading branch information
seanmacavaney committed Nov 8, 2024
1 parent 2ada646 commit 74e40f5
Showing 1 changed file with 14 additions and 10 deletions.
24 changes: 14 additions & 10 deletions ir_datasets/datasets/trec_cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -674,6 +674,7 @@ def register_docs(namespace: str, use_docs: bool, *tuples):
subsets[f"{namespace}/passages"] = PrefixedDocs(f"{NAME}/passages_{namespace}", *all_passages_spec)
return all_docs

"""
docs_v2 = register_docs(
"v2",
True,
Expand Down Expand Up @@ -703,18 +704,20 @@ def register_docs(namespace: str, use_docs: bool, *tuples):
),
("kilt", "KILT_", KiltCastDocs("kilt"), 17_124_025),
)
"""

subsets["v2/2021"] = Dataset(
docs_v2,
CastQueries(dlc["2021/queries"], Cast2021Query),
TrecQrels(dlc["2021/qrels"], QRELS_DEFS),
)
#subsets["v2/2021"] = Dataset(
# docs_v2,
# CastQueries(dlc["2021/queries"], Cast2021Query),
# TrecQrels(dlc["2021/qrels"], QRELS_DEFS),
#)

# --- Version 3 (2022)
# https://github.com/daltonj/treccastweb#year-4-trec-2022
# Official documents = processed (split) WAPO 2020, KILT, MS Marco V2

v3_dupes = dlc["v3/dupes"]
"""
docs_v3 = register_docs(
"v3",
False,
Expand Down Expand Up @@ -751,12 +754,13 @@ def register_docs(namespace: str, use_docs: bool, *tuples):
17_111_488,
),
)
"""

subsets["v3/2022"] = Dataset(
docs_v3,
CastQueries(dlc["2022/queries"], Cast2022Query),
TrecQrels(dlc["2022/qrels"], QRELS_DEFS),
)
#subsets["v3/2022"] = Dataset(
# docs_v3,
# CastQueries(dlc["2022/queries"], Cast2022Query),
# TrecQrels(dlc["2022/qrels"], QRELS_DEFS),
#)

# --- Register all datasets
ir_datasets.registry.register(NAME, base)
Expand Down

0 comments on commit 74e40f5

Please sign in to comment.