Skip to content

Commit

Permalink
Add stuff to Corpus class
Browse files Browse the repository at this point in the history
  • Loading branch information
ilyannn committed Dec 10, 2023
1 parent 3759e0d commit 1c14ea7
Showing 1 changed file with 7 additions and 9 deletions.
16 changes: 7 additions & 9 deletions de_wiki_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,15 @@
class Corpus:
def __init__(
self,
data: dict[int:str],
data: dict[int:dict[str:str]],
embeddings: Embeddings,
):
self.data = data
self.embeddings = embeddings

def format_chunk(self, chunk_id):
return f"""{chunk_id} [{self.data[chunk_id]["title"]}] {self.data[chunk_id]["text"]}"""


def load_corpus() -> Corpus:
"""Load and cache the dataset and its embeddings."""
Expand Down Expand Up @@ -215,11 +218,6 @@ def get_context_ids(
def run_loop(llm: LLM, corpus: Corpus, question: str):
"""Run an interactive loop to test the context retrieval"""

data = corpus.data

def format_chunk(chunk_id):
return f"""{chunk_id} [{data[chunk_id]["title"]}] {data[chunk_id]["text"]}"""

while question:
logging.info("Answering '%s'", question)

Expand All @@ -228,13 +226,13 @@ def format_chunk(chunk_id):
if context_ids:
print("---- Accepted ----")
for cid in context_ids:
print(format_chunk(cid))
print(corpus.format_chunk(cid))

print("---- Rejected ----")
for cid in rejected_ids:
print(format_chunk(cid))
print(corpus.format_chunk(cid))

context = build_context(data[cid] for cid in context_ids)
context = build_context(corpus.data[cid] for cid in context_ids)

print("---- Without context ----")
print(llm.answer(question_prompt(question)))
Expand Down

0 comments on commit 1c14ea7

Please sign in to comment.