From 2e31347d6781d7ecac942e5c6e60ece4e7bd94d8 Mon Sep 17 00:00:00 2001
From: glemaitre To ease the deployment, we rely on Use the Ragger Duck library
Deploy Ragger Duck#
pixi
. Refer to following
-link for installing pixi
.pixi
but in short, for the
+currently supported platform, the following should be enough:
curl -fsSL https://pixi.sh/install.sh | bash
+
In the latest stage, pixi
will be in charge to create the Python environments to
build the scikit-learn documentation, train the retrievers, and launch the Web Console.
We already setup several environments for you depending on the platform and hardware
diff --git a/searchindex.js b/searchindex.js
index ceec4db..2aa28d6 100644
--- a/searchindex.js
+++ b/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"docnames": ["about", "auto_examples/index", "index", "install", "references/embedding", "references/generated/ragger_duck.embedding.SentenceTransformer", "references/generated/ragger_duck.prompt.BasicPromptingStrategy", "references/generated/ragger_duck.retrieval.BM25Retriever", "references/generated/ragger_duck.retrieval.RetrieverReranker", "references/generated/ragger_duck.retrieval.SemanticRetriever", "references/generated/ragger_duck.scraping.APINumPyDocExtractor", "references/generated/ragger_duck.scraping.GalleryExampleExtractor", "references/generated/ragger_duck.scraping.UserGuideDocExtractor", "references/index", "references/prompt", "references/retrieval", "references/scraping", "user_guide/index", "user_guide/information_retrieval", "user_guide/large_language_model", "user_guide/text_scraping", "whats_new", "whats_new/v0.1"], "filenames": ["about.rst", "auto_examples/index.rst", "index.rst", "install.rst", "references/embedding.rst", "references/generated/ragger_duck.embedding.SentenceTransformer.rst", "references/generated/ragger_duck.prompt.BasicPromptingStrategy.rst", "references/generated/ragger_duck.retrieval.BM25Retriever.rst", "references/generated/ragger_duck.retrieval.RetrieverReranker.rst", "references/generated/ragger_duck.retrieval.SemanticRetriever.rst", "references/generated/ragger_duck.scraping.APINumPyDocExtractor.rst", "references/generated/ragger_duck.scraping.GalleryExampleExtractor.rst", "references/generated/ragger_duck.scraping.UserGuideDocExtractor.rst", "references/index.rst", "references/prompt.rst", "references/retrieval.rst", "references/scraping.rst", "user_guide/index.rst", "user_guide/information_retrieval.rst", "user_guide/large_language_model.rst", "user_guide/text_scraping.rst", "whats_new.rst", "whats_new/v0.1.rst"], "titles": ["About us", "Examples", "Ragger Duck documentation", "Getting Started", "Embedding", "SentenceTransformer", "BasicPromptingStrategy", "BM25Retriever", "RetrieverReranker", "SemanticRetriever", "APINumPyDocExtractor", "GalleryExampleExtractor", "UserGuideDocExtractor", "API reference", "Prompt", "Retrieval", "Scraping the documentation", "User guide", "Retriever", "Prompting", "Text Scraping", "Release history", "Version 0.1"], "terms": {"thi": [0, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 20], "i": [0, 1, 2, 3, 5, 7, 8, 9, 10, 11, 12, 13, 18, 19, 20], "sundai": [0, 3], "afternoon": [0, 3], "project": [0, 2, 18], "One": 1, "dai": 1, "write": 1, "some": [1, 2, 3, 17, 20], "right": [1, 8], "now": [1, 3], "someth": 1, "locat": 1, "script": 1, "folder": [1, 10, 11, 12], "date": 2, "apr": 2, "17": 2, "2024": 2, "version": [2, 3, 5, 10, 11, 12], "us": [2, 5, 6, 7, 8, 9, 10, 11, 12, 18, 20], "link": [2, 3], "sourc": [2, 3, 10, 11, 12], "repositori": [2, 3, 5], "issu": 2, "idea": [2, 18], "sklearn": [2, 3, 7], "The": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16, 20], "provid": [2, 3, 18, 20], "set": [2, 3, 5, 6, 7, 8, 9, 10, 11, 12], "tool": 2, "build": [2, 9, 18, 20], "rag": [2, 3, 17], "answer": [2, 6, 19], "question": [2, 19], "about": 2, "scikit": [2, 5, 9, 16], "learn": [2, 5, 9, 16], "librari": [2, 9], "get": [2, 5, 6, 7, 8, 9, 10, 11, 12, 19], "start": 2, "check": [2, 5, 6, 7, 8, 9, 10, 11, 12], "out": 2, "guid": [2, 3, 5, 6, 7, 8, 9, 10, 11, 12], "instal": [2, 3], "extra": 2, "inform": [2, 5, 6, 7, 8, 9, 10, 11, 12, 20], "new": [2, 5, 10, 11, 12], "contribut": 2, "also": [2, 3], "To": [2, 3], "guidelin": 2, "user": [2, 3, 5, 6, 7, 8, 9, 10, 11, 12], "depth": 2, "kei": [2, 5], "concept": 2, "background": 2, "explan": 2, "api": [2, 3, 5, 9, 10, 11, 12, 17], "refer": [2, 3, 20], "contain": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16, 20], "detail": [2, 3], "descript": 2, "avail": [2, 18], "exampl": [2, 3, 5, 10, 11, 12], "galleri": [2, 3, 11], "good": 2, "place": 2, "see": [2, 5, 10, 11, 12], "action": 2, "select": [2, 3], "an": [2, 3, 5, 6, 9, 10, 11, 12, 18], "dive": 2, "sinc": 3, "yet": 3, "wai": [3, 20], "packag": [3, 13], "easi": 3, "dirti": 3, "add": 3, "your": 3, "path": [3, 5, 10, 11, 12], "moment": 3, "import": 3, "sy": 3, "path_to_packag": 3, "ragger_duck": [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16], "append": 3, "eas": 3, "deploy": 3, "we": [3, 6, 8, 9, 18, 19, 20], "reli": 3, "pixi": 3, "follow": [3, 5, 8, 9], "In": [3, 18, 21, 22], "latest": 3, "stage": 3, "charg": 3, "creat": [3, 5, 9], "python": 3, "environ": 3, "alreadi": 3, "setup": 3, "sever": [3, 8], "you": [3, 8], "depend": 3, "platform": 3, "hardwar": 3, "dispos": 3, "cpu": [3, 5, 11, 12], "cross": [3, 8, 18], "e": [3, 5], "linux": 3, "maco": 3, "x86_64": 3, "arm64": 3, "mp": [3, 5], "m1": 3, "m2": 3, "m3": 3, "chip": 3, "cuda": [3, 5], "12": 3, "1": [3, 5, 7, 9, 10, 11, 12], "machin": 3, "gpu": [3, 5], "support": 3, "make": [3, 18], "experi": 3, "scalewai": 3, "instanc": [3, 5, 6, 7, 8, 9, 10, 11, 12], "l4": 3, "11": 3, "7": 3, "similar": [3, 9, 18], "instead": 3, "note": [3, 8], "can": [3, 5, 8, 10, 12, 20], "modifi": 3, "toml": 3, "own": 3, "might": 3, "suit": 3, "need": [3, 18], "github": 3, "self": [3, 5, 6, 7, 8, 9, 10, 11, 12], "all": [3, 8], "necessari": 3, "file": 3, "recurs": 3, "submodul": 3, "git": 3, "com": 3, "glemaitr": 3, "first": [3, 5], "html": [3, 12], "gener": 3, "page": [3, 12], "run": [3, 11, 12], "command": 3, "frozen": 3, "doc": 3, "have": [3, 5, 6, 7, 8, 9, 10, 11, 12, 18], "differ": [3, 20], "each": [3, 5, 6, 7, 8, 9, 10, 11, 12, 18], "type": [3, 18], "more": [3, 18, 20], "strategi": [3, 6, 19], "propos": 3, "specif": [3, 20], "specifi": 3, "llm": [3, 6, 19], "For": 3, "test": 3, "purpos": 3, "mistral": 3, "7b": 3, "fetch": 3, "ar": [3, 5, 6, 7, 8, 9, 10, 11, 12, 18, 20], "Then": [3, 18], "requir": 3, "which": 3, "want": [3, 8], "offload": 3, "access": 3, "address": 3, "http": 3, "localhost": 3, "8123": 3, "modul": [4, 5, 16, 20], "function": [4, 10, 12, 16, 20], "emb": [4, 5, 9], "transform": [4, 5, 7, 9, 10, 11, 12, 18], "allow": [4, 6], "text": [4, 5, 7, 10, 11, 12, 17], "class": [5, 6, 7, 8, 9, 10, 11, 12, 20], "embed": [5, 7, 9, 13, 18], "model_name_or_path": 5, "none": [5, 7, 8, 9, 10, 11, 12], "devic": 5, "cache_fold": 5, "use_auth_token": 5, "batch_siz": 5, "32": 5, "show_progress_bar": 5, "true": [5, 6, 7, 8, 9, 10, 11, 12], "sentenc": [5, 9, 18], "thin": 5, "wrapper": 5, "around": 5, "sentence_transform": [5, 8], "thu": 5, "insid": 5, "pipelin": [5, 6, 7, 8, 9, 10, 11, 12], "paramet": [5, 6, 7, 8, 9, 10, 11, 12, 20], "str": [5, 7, 8, 9, 12], "default": [5, 6, 7, 8, 9, 10, 11, 12], "If": [5, 6, 7, 8, 9, 10, 11, 12, 18], "filepath": 5, "disc": 5, "load": 5, "model": [5, 6, 18, 19], "from": [5, 7, 8, 9, 10, 11, 12, 18, 19, 20], "tri": 5, "download": [5, 18], "pre": [5, 18], "train": [5, 7, 8, 9, 18], "fail": 5, "construct": 5, "huggingfac": [5, 18], "name": [5, 6, 7, 8, 9, 10, 11, 12, 18], "iter": 5, "nn": 5, "custom": 5, "scratch": 5, "g": 5, "should": [5, 6], "comput": [5, 7, 8, 9], "store": 5, "bool": [5, 6, 7, 8, 9, 10, 11, 12], "authent": 5, "token": 5, "privat": 5, "int": [5, 7, 8, 9, 11, 12], "batch": 5, "size": [5, 11, 12, 20], "dure": [5, 18], "whether": [5, 6, 8], "show": 5, "progress": [5, 21, 22], "bar": 5, "method": [5, 6, 7, 8, 9, 10, 11, 12], "fit": [5, 7, 8, 9, 10, 11, 12], "x": [5, 7, 8, 9, 10, 11, 12], "y": [5, 7, 8, 9, 10, 11, 12], "No": [5, 10, 11, 12], "op": [5, 10, 11, 12], "oper": [5, 10, 11, 12], "onli": [5, 10, 11, 12, 20], "valid": [5, 10, 11, 12], "ignor": [5, 7, 8, 9, 10, 11, 12], "return": [5, 6, 7, 8, 9, 10, 11, 12], "estim": [5, 6, 7, 8, 9, 10, 11, 12, 20], "fit_transform": [5, 10, 11, 12], "fit_param": [5, 10, 11, 12], "data": [5, 7, 8, 9, 10, 11, 12], "option": [5, 10, 11, 12], "arrai": [5, 10, 11, 12], "like": [5, 10, 11, 12], "shape": [5, 7, 9, 10, 11, 12], "n_sampl": [5, 10, 11, 12], "n_featur": [5, 7, 9, 10, 11, 12], "input": [5, 7, 8, 9, 10, 11, 12], "sampl": [5, 10, 11, 12], "n_output": [5, 10, 11, 12], "target": [5, 10, 11, 12], "valu": [5, 6, 7, 8, 9, 10, 11, 12], "unsupervis": [5, 10, 11, 12], "dict": [5, 6, 7, 8, 9, 10, 11, 12], "addit": [5, 10, 11, 12], "x_new": [5, 10, 11, 12], "ndarrai": [5, 7, 9, 10, 11, 12], "n_features_new": [5, 10, 11, 12], "get_metadata_rout": [5, 6, 7, 8, 9, 10, 11, 12], "metadata": [5, 6, 7, 8, 9, 10, 11, 12], "rout": [5, 6, 7, 8, 9, 10, 11, 12], "object": [5, 6, 7, 8, 9, 10, 11, 12], "pleas": [5, 6, 7, 8, 9, 10, 11, 12], "how": [5, 6, 7, 8, 9, 10, 11, 12], "mechan": [5, 6, 7, 8, 9, 10, 11, 12], "work": [5, 6, 7, 8, 9, 10, 11, 12], "metadatarequest": [5, 6, 7, 8, 9, 10, 11, 12], "A": [5, 6, 7, 8, 9, 10, 11, 12], "encapsul": [5, 6, 7, 8, 9, 10, 11, 12], "get_param": [5, 6, 7, 8, 9, 10, 11, 12], "deep": [5, 6, 7, 8, 9, 10, 11, 12], "subobject": [5, 6, 7, 8, 9, 10, 11, 12], "param": [5, 6, 7, 8, 9, 10, 11, 12], "map": [5, 6, 7, 8, 9, 10, 11, 12], "set_output": [5, 10, 11, 12], "output": [5, 10, 11, 12], "sphx_glr_auto_examples_miscellaneous_plot_set_output": [5, 10, 11, 12], "py": [5, 10, 11, 12], "panda": [5, 10, 11, 12], "configur": [5, 10, 11, 12], "format": [5, 10, 11, 12], "datafram": [5, 10, 11, 12], "polar": [5, 10, 11, 12], "unchang": [5, 10, 11, 12], "4": [5, 10, 11, 12], "wa": [5, 10, 11, 12], "ad": [5, 10, 11, 12], "set_param": [5, 6, 7, 8, 9, 10, 11, 12], "simpl": [5, 6, 7, 8, 9, 10, 11, 12, 20], "well": [5, 6, 7, 8, 9, 10, 11, 12], "nest": [5, 6, 7, 8, 9, 10, 11, 12], "latter": [5, 6, 7, 8, 9, 10, 11, 12], "form": [5, 6, 7, 8, 9, 10, 11, 12], "compon": [5, 6, 7, 8, 9, 10, 11, 12], "__": [5, 6, 7, 8, 9, 10, 11, 12], "so": [5, 6, 7, 8, 9, 10, 11, 12], "": [5, 6, 7, 8, 9, 10, 11, 12], "possibl": [5, 6, 7, 8, 9, 10, 11, 12], "updat": [5, 6, 7, 8, 9, 10, 11, 12], "vector": [5, 7, 18], "length": 5, "n_sentenc": [5, 7, 9], "singl": 5, "list": [5, 7, 8, 9, 10, 11, 12], "dictionari": [5, 10, 11, 12], "embedding_s": 5, "prompt": [6, 13, 17], "retriev": [6, 7, 8, 9, 13, 17, 19], "use_retrieved_context": 6, "queri": [6, 7, 8, 9, 18], "onc": [6, 19], "context": [6, 8, 18, 19], "request": [6, 19], "languag": [6, 19], "expect": [6, 8, 9], "implement": [6, 8, 18, 19], "__call__": 6, "take": 6, "respons": 6, "It": [6, 10, 20], "instruct": 6, "base": [6, 7, 8], "befor": 6, "count_vector": 7, "top_k": [7, 9], "b": 7, "0": 7, "75": 7, "k1": 7, "6": 7, "k": [7, 9], "nearest": [7, 9, 18], "neighbor": [7, 9, 18], "lexic": [7, 8, 17, 19], "search": [7, 19], "bm25": 7, "count": 7, "term": 7, "document": [7, 8, 9, 10, 11, 12, 13, 17, 18], "feature_extract": 7, "countvector": [7, 18], "number": [7, 8, 9, 11, 12], "attribut": [7, 9, 11, 12], "x_fit_": [7, 9], "x_embedded_": [7, 9], "vocabulari": [7, 8, 18], "idf": [7, 8, 18], "most": [7, 8, 9], "relev": [7, 8, 9], "cross_encod": 8, "min_top_k": 8, "max_top_k": 8, "threshold": 8, "drop_dupl": 8, "hybrid": 8, "semant": [8, 9, 17, 19], "encod": [8, 18], "rerank": [8, 17, 19], "accept": 8, "case": [8, 18], "result": [8, 18], "crossencod": 8, "minimum": 8, "less": 8, "than": 8, "maximum": 8, "float": 8, "filter": 8, "score": [8, 18], "drop": 8, "duplic": 8, "step": 8, "done": 8, "after": 8, "index": 9, "faiss": [9, 18], "inner": 9, "product": 9, "cosin": 9, "mean": 9, "normal": 9, "scrape": [10, 11, 12, 13, 17], "extract": [10, 11, 12, 20], "numpydoc": [10, 20], "process": [10, 12], "templat": 10, "pathlib": [10, 11, 12], "chunk_siz": [11, 12], "300": [11, 12], "chunk_overlap": [11, 12], "50": [11, 12], "n_job": [11, 12], "chunk": [11, 12, 18, 20], "split": [11, 12], "overlap": [11, 12], "between": [11, 12, 18], "two": [11, 12, 18], "consecut": [11, 12], "job": [11, 12], "parallel": [11, 12], "core": [11, 12], "text_splitter_": [11, 12], "langchain": [11, 12], "text_splitt": [11, 12], "recursivecharactertextsplitt": [11, 12], "splitter": [11, 12], "folders_to_exclud": 12, "string": 12, "correspond": 12, "exclud": 12, "full": 13, "apinumpydocextractor": [13, 20], "galleryexampleextractor": 13, "userguidedocextractor": [13, 20], "sentencetransform": [13, 18], "bm25retriev": [13, 18], "retrieverrerank": [13, 18], "semanticretriev": [13, 18], "basicpromptingstrategi": [13, 19], "websit": [16, 20], "info": 17, "regard": 17, "merg": 17, "differenti": 18, "exact": 18, "match": 18, "tf": 18, "weight": 18, "scheme": 18, "seen": 18, "flexibl": 18, "space": 18, "databas": 18, "closest": 18, "given": 18, "approxim": 18, "algorithm": 18, "As": 18, "ani": [18, 20], "both": 18, "our": 18, "microsoft": 18, "bing": 18, "pair": 18, "larg": 19, "meaning": 20, "advanc": 20, "scraper": 20, "inde": 20, "pars": 20, "section": 20, "while": 20, "don": 20, "t": 20, "control": 20, "thei": 20, "alwai": 20, "hope": 20, "remov": 20, "ambigu": 20, "could": 20, "exist": 20, "when": 20, "without": 20, "webpag": 20, "addition": 20}, "objects": {"ragger_duck": [[4, 0, 0, "-", "embedding"], [14, 0, 0, "-", "prompt"], [15, 0, 0, "-", "retrieval"], [16, 0, 0, "-", "scraping"]], "ragger_duck.embedding": [[5, 1, 1, "", "SentenceTransformer"]], "ragger_duck.embedding.SentenceTransformer": [[5, 2, 1, "", "fit"], [5, 2, 1, "", "fit_transform"], [5, 2, 1, "", "get_metadata_routing"], [5, 2, 1, "", "get_params"], [5, 2, 1, "", "set_output"], [5, 2, 1, "", "set_params"], [5, 2, 1, "", "transform"]], "ragger_duck.prompt": [[6, 1, 1, "", "BasicPromptingStrategy"]], "ragger_duck.prompt.BasicPromptingStrategy": [[6, 2, 1, "", "get_metadata_routing"], [6, 2, 1, "", "get_params"], [6, 2, 1, "", "set_params"]], "ragger_duck.retrieval": [[7, 1, 1, "", "BM25Retriever"], [8, 1, 1, "", "RetrieverReranker"], [9, 1, 1, "", "SemanticRetriever"]], "ragger_duck.retrieval.BM25Retriever": [[7, 2, 1, "", "fit"], [7, 2, 1, "", "get_metadata_routing"], [7, 2, 1, "", "get_params"], [7, 2, 1, "", "query"], [7, 2, 1, "", "set_params"]], "ragger_duck.retrieval.RetrieverReranker": [[8, 2, 1, "", "fit"], [8, 2, 1, "", "get_metadata_routing"], [8, 2, 1, "", "get_params"], [8, 2, 1, "", "query"], [8, 2, 1, "", "set_params"]], "ragger_duck.retrieval.SemanticRetriever": [[9, 2, 1, "", "fit"], [9, 2, 1, "", "get_metadata_routing"], [9, 2, 1, "", "get_params"], [9, 2, 1, "", "query"], [9, 2, 1, "", "set_params"]], "ragger_duck.scraping": [[10, 1, 1, "", "APINumPyDocExtractor"], [11, 1, 1, "", "GalleryExampleExtractor"], [12, 1, 1, "", "UserGuideDocExtractor"]], "ragger_duck.scraping.APINumPyDocExtractor": [[10, 2, 1, "", "fit"], [10, 2, 1, "", "fit_transform"], [10, 2, 1, "", "get_metadata_routing"], [10, 2, 1, "", "get_params"], [10, 2, 1, "", "set_output"], [10, 2, 1, "", "set_params"], [10, 2, 1, "", "transform"]], "ragger_duck.scraping.GalleryExampleExtractor": [[11, 2, 1, "", "fit"], [11, 2, 1, "", "fit_transform"], [11, 2, 1, "", "get_metadata_routing"], [11, 2, 1, "", "get_params"], [11, 2, 1, "", "set_output"], [11, 2, 1, "", "set_params"], [11, 2, 1, "", "transform"]], "ragger_duck.scraping.UserGuideDocExtractor": [[12, 2, 1, "", "fit"], [12, 2, 1, "", "fit_transform"], [12, 2, 1, "", "get_metadata_routing"], [12, 2, 1, "", "get_params"], [12, 2, 1, "", "set_output"], [12, 2, 1, "", "set_params"], [12, 2, 1, "", "transform"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"]}, "titleterms": {"about": 0, "u": 0, "exampl": 1, "ragger": [2, 3], "duck": [2, 3], "document": [2, 3, 16, 19, 20], "get": 3, "start": 3, "us": 3, "librari": 3, "deploi": 3, "clone": 3, "project": 3, "build": 3, "scikit": 3, "learn": 3, "train": 3, "semant": [3, 18], "lexic": [3, 18], "retriev": [3, 15, 18], "download": 3, "larg": 3, "languag": 3, "model": 3, "launch": 3, "web": 3, "consol": 3, "embed": 4, "sentencetransform": 5, "basicpromptingstrategi": 6, "bm25retriev": 7, "retrieverrerank": 8, "semanticretriev": 9, "apinumpydocextractor": 10, "galleryexampleextractor": 11, "userguidedocextractor": 12, "api": [13, 19, 20], "refer": 13, "prompt": [14, 19], "scrape": [16, 20], "user": [17, 20], "guid": [17, 20], "implement": 17, "detail": 17, "rerank": 18, "merg": 18, "text": 20, "releas": 21, "histori": 21, "version": [21, 22], "0": [21, 22], "1": [21, 22], "changelog": [21, 22]}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 60}, "alltitles": {"About us": [[0, "about-us"]], "Examples": [[1, "examples"]], "Ragger Duck documentation": [[2, "ragger-duck-documentation"]], "Getting Started": [[3, "getting-started"]], "Use the Ragger Duck library": [[3, "use-the-ragger-duck-library"]], "Deploy Ragger Duck": [[3, "deploy-ragger-duck"]], "Cloning the project": [[3, "cloning-the-project"]], "Build the scikit-learn documentation": [[3, "build-the-scikit-learn-documentation"]], "Train the semantic and lexical retrievers": [[3, "train-the-semantic-and-lexical-retrievers"]], "Download the Large Language Model": [[3, "download-the-large-language-model"]], "Launch the Web Console": [[3, "launch-the-web-console"]], "Embedding": [[4, "module-ragger_duck.embedding"]], "SentenceTransformer": [[5, "sentencetransformer"]], "BasicPromptingStrategy": [[6, "basicpromptingstrategy"]], "BM25Retriever": [[7, "bm25retriever"]], "RetrieverReranker": [[8, "retrieverreranker"]], "SemanticRetriever": [[9, "semanticretriever"]], "APINumPyDocExtractor": [[10, "apinumpydocextractor"]], "GalleryExampleExtractor": [[11, "galleryexampleextractor"]], "UserGuideDocExtractor": [[12, "userguidedocextractor"]], "API reference": [[13, "api-reference"]], "Prompt": [[14, "module-ragger_duck.prompt"]], "Retrieval": [[15, "module-ragger_duck.retrieval"]], "Scraping the documentation": [[16, "module-ragger_duck.scraping"]], "User Guide": [[17, "user-guide"]], "Implementation details": [[17, "implementation-details"]], "Retriever": [[18, "retriever"]], "Lexical retrievers": [[18, "lexical-retrievers"]], "Semantical retrievers": [[18, "semantical-retrievers"]], "Reranker: merging lexical and semantical retrievers": [[18, "reranker-merging-lexical-and-semantical-retrievers"]], "Prompting": [[19, "prompting"]], "Prompting for API documentation": [[19, "prompting-for-api-documentation"]], "Text Scraping": [[20, "text-scraping"]], "API documentation": [[20, "api-documentation"]], "User Guide documentation": [[20, "user-guide-documentation"]], "Release history": [[21, "release-history"]], "Version 0.1": [[21, "version-0-1"], [22, "version-0-1"]], "Changelog": [[21, "changelog"], [22, "changelog"]]}, "indexentries": {"module": [[4, "module-ragger_duck.embedding"], [14, "module-ragger_duck.prompt"], [15, "module-ragger_duck.retrieval"], [16, "module-ragger_duck.scraping"]], "ragger_duck.embedding": [[4, "module-ragger_duck.embedding"]], "sentencetransformer (class in ragger_duck.embedding)": [[5, "ragger_duck.embedding.SentenceTransformer"]], "fit() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.fit"]], "fit_transform() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.fit_transform"]], "get_metadata_routing() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.get_metadata_routing"]], "get_params() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.get_params"]], "set_output() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.set_output"]], "set_params() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.set_params"]], "transform() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.transform"]], "basicpromptingstrategy (class in ragger_duck.prompt)": [[6, "ragger_duck.prompt.BasicPromptingStrategy"]], "get_metadata_routing() (ragger_duck.prompt.basicpromptingstrategy method)": [[6, "ragger_duck.prompt.BasicPromptingStrategy.get_metadata_routing"]], "get_params() (ragger_duck.prompt.basicpromptingstrategy method)": [[6, "ragger_duck.prompt.BasicPromptingStrategy.get_params"]], "set_params() (ragger_duck.prompt.basicpromptingstrategy method)": [[6, "ragger_duck.prompt.BasicPromptingStrategy.set_params"]], "bm25retriever (class in ragger_duck.retrieval)": [[7, "ragger_duck.retrieval.BM25Retriever"]], "fit() (ragger_duck.retrieval.bm25retriever method)": [[7, "ragger_duck.retrieval.BM25Retriever.fit"]], "get_metadata_routing() (ragger_duck.retrieval.bm25retriever method)": [[7, "ragger_duck.retrieval.BM25Retriever.get_metadata_routing"]], "get_params() (ragger_duck.retrieval.bm25retriever method)": [[7, "ragger_duck.retrieval.BM25Retriever.get_params"]], "query() (ragger_duck.retrieval.bm25retriever method)": [[7, "ragger_duck.retrieval.BM25Retriever.query"]], "set_params() (ragger_duck.retrieval.bm25retriever method)": [[7, "ragger_duck.retrieval.BM25Retriever.set_params"]], "retrieverreranker (class in ragger_duck.retrieval)": [[8, "ragger_duck.retrieval.RetrieverReranker"]], "fit() (ragger_duck.retrieval.retrieverreranker method)": [[8, "ragger_duck.retrieval.RetrieverReranker.fit"]], "get_metadata_routing() (ragger_duck.retrieval.retrieverreranker method)": [[8, "ragger_duck.retrieval.RetrieverReranker.get_metadata_routing"]], "get_params() (ragger_duck.retrieval.retrieverreranker method)": [[8, "ragger_duck.retrieval.RetrieverReranker.get_params"]], "query() (ragger_duck.retrieval.retrieverreranker method)": [[8, "ragger_duck.retrieval.RetrieverReranker.query"]], "set_params() (ragger_duck.retrieval.retrieverreranker method)": [[8, "ragger_duck.retrieval.RetrieverReranker.set_params"]], "semanticretriever (class in ragger_duck.retrieval)": [[9, "ragger_duck.retrieval.SemanticRetriever"]], "fit() (ragger_duck.retrieval.semanticretriever method)": [[9, "ragger_duck.retrieval.SemanticRetriever.fit"]], "get_metadata_routing() (ragger_duck.retrieval.semanticretriever method)": [[9, "ragger_duck.retrieval.SemanticRetriever.get_metadata_routing"]], "get_params() (ragger_duck.retrieval.semanticretriever method)": [[9, "ragger_duck.retrieval.SemanticRetriever.get_params"]], "query() (ragger_duck.retrieval.semanticretriever method)": [[9, "ragger_duck.retrieval.SemanticRetriever.query"]], "set_params() (ragger_duck.retrieval.semanticretriever method)": [[9, "ragger_duck.retrieval.SemanticRetriever.set_params"]], "apinumpydocextractor (class in ragger_duck.scraping)": [[10, "ragger_duck.scraping.APINumPyDocExtractor"]], "fit() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.fit"]], "fit_transform() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.fit_transform"]], "get_metadata_routing() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.get_metadata_routing"]], "get_params() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.get_params"]], "set_output() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.set_output"]], "set_params() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.set_params"]], "transform() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.transform"]], "galleryexampleextractor (class in ragger_duck.scraping)": [[11, "ragger_duck.scraping.GalleryExampleExtractor"]], "fit() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.fit"]], "fit_transform() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.fit_transform"]], "get_metadata_routing() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.get_metadata_routing"]], "get_params() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.get_params"]], "set_output() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.set_output"]], "set_params() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.set_params"]], "transform() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.transform"]], "userguidedocextractor (class in ragger_duck.scraping)": [[12, "ragger_duck.scraping.UserGuideDocExtractor"]], "fit() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.fit"]], "fit_transform() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.fit_transform"]], "get_metadata_routing() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.get_metadata_routing"]], "get_params() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.get_params"]], "set_output() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.set_output"]], "set_params() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.set_params"]], "transform() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.transform"]], "ragger_duck.prompt": [[14, "module-ragger_duck.prompt"]], "ragger_duck.retrieval": [[15, "module-ragger_duck.retrieval"]], "ragger_duck.scraping": [[16, "module-ragger_duck.scraping"]]}})
\ No newline at end of file
+Search.setIndex({"docnames": ["about", "auto_examples/index", "index", "install", "references/embedding", "references/generated/ragger_duck.embedding.SentenceTransformer", "references/generated/ragger_duck.prompt.BasicPromptingStrategy", "references/generated/ragger_duck.retrieval.BM25Retriever", "references/generated/ragger_duck.retrieval.RetrieverReranker", "references/generated/ragger_duck.retrieval.SemanticRetriever", "references/generated/ragger_duck.scraping.APINumPyDocExtractor", "references/generated/ragger_duck.scraping.GalleryExampleExtractor", "references/generated/ragger_duck.scraping.UserGuideDocExtractor", "references/index", "references/prompt", "references/retrieval", "references/scraping", "user_guide/index", "user_guide/information_retrieval", "user_guide/large_language_model", "user_guide/text_scraping", "whats_new", "whats_new/v0.1"], "filenames": ["about.rst", "auto_examples/index.rst", "index.rst", "install.rst", "references/embedding.rst", "references/generated/ragger_duck.embedding.SentenceTransformer.rst", "references/generated/ragger_duck.prompt.BasicPromptingStrategy.rst", "references/generated/ragger_duck.retrieval.BM25Retriever.rst", "references/generated/ragger_duck.retrieval.RetrieverReranker.rst", "references/generated/ragger_duck.retrieval.SemanticRetriever.rst", "references/generated/ragger_duck.scraping.APINumPyDocExtractor.rst", "references/generated/ragger_duck.scraping.GalleryExampleExtractor.rst", "references/generated/ragger_duck.scraping.UserGuideDocExtractor.rst", "references/index.rst", "references/prompt.rst", "references/retrieval.rst", "references/scraping.rst", "user_guide/index.rst", "user_guide/information_retrieval.rst", "user_guide/large_language_model.rst", "user_guide/text_scraping.rst", "whats_new.rst", "whats_new/v0.1.rst"], "titles": ["About us", "Examples", "Ragger Duck documentation", "Getting Started", "Embedding", "SentenceTransformer", "BasicPromptingStrategy", "BM25Retriever", "RetrieverReranker", "SemanticRetriever", "APINumPyDocExtractor", "GalleryExampleExtractor", "UserGuideDocExtractor", "API reference", "Prompt", "Retrieval", "Scraping the documentation", "User guide", "Retriever", "Prompting", "Text Scraping", "Release history", "Version 0.1"], "terms": {"thi": [0, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 20], "i": [0, 1, 2, 3, 5, 7, 8, 9, 10, 11, 12, 13, 18, 19, 20], "sundai": [0, 3], "afternoon": [0, 3], "project": [0, 2, 18], "One": 1, "dai": 1, "write": 1, "some": [1, 2, 3, 17, 20], "right": [1, 8], "now": [1, 3], "someth": 1, "locat": 1, "script": 1, "folder": [1, 10, 11, 12], "date": 2, "apr": 2, "17": 2, "2024": 2, "version": [2, 3, 5, 10, 11, 12], "us": [2, 5, 6, 7, 8, 9, 10, 11, 12, 18, 20], "link": [2, 3], "sourc": [2, 3, 10, 11, 12], "repositori": [2, 3, 5], "issu": 2, "idea": [2, 18], "sklearn": [2, 3, 7], "The": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16, 20], "provid": [2, 3, 18, 20], "set": [2, 3, 5, 6, 7, 8, 9, 10, 11, 12], "tool": 2, "build": [2, 9, 18, 20], "rag": [2, 3, 17], "answer": [2, 6, 19], "question": [2, 19], "about": 2, "scikit": [2, 5, 9, 16], "learn": [2, 5, 9, 16], "librari": [2, 9], "get": [2, 5, 6, 7, 8, 9, 10, 11, 12, 19], "start": 2, "check": [2, 5, 6, 7, 8, 9, 10, 11, 12], "out": 2, "guid": [2, 3, 5, 6, 7, 8, 9, 10, 11, 12], "instal": [2, 3], "extra": 2, "inform": [2, 5, 6, 7, 8, 9, 10, 11, 12, 20], "new": [2, 5, 10, 11, 12], "contribut": 2, "also": [2, 3], "To": [2, 3], "guidelin": 2, "user": [2, 3, 5, 6, 7, 8, 9, 10, 11, 12], "depth": 2, "kei": [2, 5], "concept": 2, "background": 2, "explan": 2, "api": [2, 3, 5, 9, 10, 11, 12, 17], "refer": [2, 3, 20], "contain": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16, 20], "detail": [2, 3], "descript": 2, "avail": [2, 18], "exampl": [2, 3, 5, 10, 11, 12], "galleri": [2, 3, 11], "good": 2, "place": 2, "see": [2, 5, 10, 11, 12], "action": 2, "select": [2, 3], "an": [2, 3, 5, 6, 9, 10, 11, 12, 18], "dive": 2, "sinc": 3, "yet": 3, "wai": [3, 20], "packag": [3, 13], "easi": 3, "dirti": 3, "add": 3, "your": 3, "path": [3, 5, 10, 11, 12], "moment": 3, "import": 3, "sy": 3, "path_to_packag": 3, "ragger_duck": [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16], "append": 3, "eas": 3, "deploy": 3, "we": [3, 6, 8, 9, 18, 19, 20], "reli": 3, "pixi": 3, "follow": [3, 5, 8, 9], "short": 3, "current": 3, "support": 3, "platform": 3, "should": [3, 5, 6], "enough": 3, "curl": 3, "fssl": 3, "http": 3, "sh": 3, "bash": 3, "In": [3, 18, 21, 22], "latest": 3, "stage": 3, "charg": 3, "creat": [3, 5, 9], "python": 3, "environ": 3, "alreadi": 3, "setup": 3, "sever": [3, 8], "you": [3, 8], "depend": 3, "hardwar": 3, "dispos": 3, "cpu": [3, 5, 11, 12], "cross": [3, 8, 18], "e": [3, 5], "linux": 3, "maco": 3, "x86_64": 3, "arm64": 3, "mp": [3, 5], "m1": 3, "m2": 3, "m3": 3, "chip": 3, "cuda": [3, 5], "12": 3, "1": [3, 5, 7, 9, 10, 11, 12], "machin": 3, "gpu": [3, 5], "make": [3, 18], "experi": 3, "scalewai": 3, "instanc": [3, 5, 6, 7, 8, 9, 10, 11, 12], "l4": 3, "11": 3, "7": 3, "similar": [3, 9, 18], "instead": 3, "note": [3, 8], "can": [3, 5, 8, 10, 12, 20], "modifi": 3, "toml": 3, "own": 3, "might": 3, "suit": 3, "need": [3, 18], "github": 3, "self": [3, 5, 6, 7, 8, 9, 10, 11, 12], "all": [3, 8], "necessari": 3, "file": 3, "recurs": 3, "submodul": 3, "git": 3, "com": 3, "glemaitr": 3, "first": [3, 5], "html": [3, 12], "gener": 3, "page": [3, 12], "run": [3, 11, 12], "command": 3, "frozen": 3, "doc": 3, "have": [3, 5, 6, 7, 8, 9, 10, 11, 12, 18], "differ": [3, 20], "each": [3, 5, 6, 7, 8, 9, 10, 11, 12, 18], "type": [3, 18], "more": [3, 18, 20], "strategi": [3, 6, 19], "propos": 3, "specif": [3, 20], "specifi": 3, "llm": [3, 6, 19], "For": 3, "test": 3, "purpos": 3, "mistral": 3, "7b": 3, "fetch": 3, "ar": [3, 5, 6, 7, 8, 9, 10, 11, 12, 18, 20], "Then": [3, 18], "requir": 3, "which": 3, "want": [3, 8], "offload": 3, "access": 3, "address": 3, "localhost": 3, "8123": 3, "modul": [4, 5, 16, 20], "function": [4, 10, 12, 16, 20], "emb": [4, 5, 9], "transform": [4, 5, 7, 9, 10, 11, 12, 18], "allow": [4, 6], "text": [4, 5, 7, 10, 11, 12, 17], "class": [5, 6, 7, 8, 9, 10, 11, 12, 20], "embed": [5, 7, 9, 13, 18], "model_name_or_path": 5, "none": [5, 7, 8, 9, 10, 11, 12], "devic": 5, "cache_fold": 5, "use_auth_token": 5, "batch_siz": 5, "32": 5, "show_progress_bar": 5, "true": [5, 6, 7, 8, 9, 10, 11, 12], "sentenc": [5, 9, 18], "thin": 5, "wrapper": 5, "around": 5, "sentence_transform": [5, 8], "thu": 5, "insid": 5, "pipelin": [5, 6, 7, 8, 9, 10, 11, 12], "paramet": [5, 6, 7, 8, 9, 10, 11, 12, 20], "str": [5, 7, 8, 9, 12], "default": [5, 6, 7, 8, 9, 10, 11, 12], "If": [5, 6, 7, 8, 9, 10, 11, 12, 18], "filepath": 5, "disc": 5, "load": 5, "model": [5, 6, 18, 19], "from": [5, 7, 8, 9, 10, 11, 12, 18, 19, 20], "tri": 5, "download": [5, 18], "pre": [5, 18], "train": [5, 7, 8, 9, 18], "fail": 5, "construct": 5, "huggingfac": [5, 18], "name": [5, 6, 7, 8, 9, 10, 11, 12, 18], "iter": 5, "nn": 5, "custom": 5, "scratch": 5, "g": 5, "comput": [5, 7, 8, 9], "store": 5, "bool": [5, 6, 7, 8, 9, 10, 11, 12], "authent": 5, "token": 5, "privat": 5, "int": [5, 7, 8, 9, 11, 12], "batch": 5, "size": [5, 11, 12, 20], "dure": [5, 18], "whether": [5, 6, 8], "show": 5, "progress": [5, 21, 22], "bar": 5, "method": [5, 6, 7, 8, 9, 10, 11, 12], "fit": [5, 7, 8, 9, 10, 11, 12], "x": [5, 7, 8, 9, 10, 11, 12], "y": [5, 7, 8, 9, 10, 11, 12], "No": [5, 10, 11, 12], "op": [5, 10, 11, 12], "oper": [5, 10, 11, 12], "onli": [5, 10, 11, 12, 20], "valid": [5, 10, 11, 12], "ignor": [5, 7, 8, 9, 10, 11, 12], "return": [5, 6, 7, 8, 9, 10, 11, 12], "estim": [5, 6, 7, 8, 9, 10, 11, 12, 20], "fit_transform": [5, 10, 11, 12], "fit_param": [5, 10, 11, 12], "data": [5, 7, 8, 9, 10, 11, 12], "option": [5, 10, 11, 12], "arrai": [5, 10, 11, 12], "like": [5, 10, 11, 12], "shape": [5, 7, 9, 10, 11, 12], "n_sampl": [5, 10, 11, 12], "n_featur": [5, 7, 9, 10, 11, 12], "input": [5, 7, 8, 9, 10, 11, 12], "sampl": [5, 10, 11, 12], "n_output": [5, 10, 11, 12], "target": [5, 10, 11, 12], "valu": [5, 6, 7, 8, 9, 10, 11, 12], "unsupervis": [5, 10, 11, 12], "dict": [5, 6, 7, 8, 9, 10, 11, 12], "addit": [5, 10, 11, 12], "x_new": [5, 10, 11, 12], "ndarrai": [5, 7, 9, 10, 11, 12], "n_features_new": [5, 10, 11, 12], "get_metadata_rout": [5, 6, 7, 8, 9, 10, 11, 12], "metadata": [5, 6, 7, 8, 9, 10, 11, 12], "rout": [5, 6, 7, 8, 9, 10, 11, 12], "object": [5, 6, 7, 8, 9, 10, 11, 12], "pleas": [5, 6, 7, 8, 9, 10, 11, 12], "how": [5, 6, 7, 8, 9, 10, 11, 12], "mechan": [5, 6, 7, 8, 9, 10, 11, 12], "work": [5, 6, 7, 8, 9, 10, 11, 12], "metadatarequest": [5, 6, 7, 8, 9, 10, 11, 12], "A": [5, 6, 7, 8, 9, 10, 11, 12], "encapsul": [5, 6, 7, 8, 9, 10, 11, 12], "get_param": [5, 6, 7, 8, 9, 10, 11, 12], "deep": [5, 6, 7, 8, 9, 10, 11, 12], "subobject": [5, 6, 7, 8, 9, 10, 11, 12], "param": [5, 6, 7, 8, 9, 10, 11, 12], "map": [5, 6, 7, 8, 9, 10, 11, 12], "set_output": [5, 10, 11, 12], "output": [5, 10, 11, 12], "sphx_glr_auto_examples_miscellaneous_plot_set_output": [5, 10, 11, 12], "py": [5, 10, 11, 12], "panda": [5, 10, 11, 12], "configur": [5, 10, 11, 12], "format": [5, 10, 11, 12], "datafram": [5, 10, 11, 12], "polar": [5, 10, 11, 12], "unchang": [5, 10, 11, 12], "4": [5, 10, 11, 12], "wa": [5, 10, 11, 12], "ad": [5, 10, 11, 12], "set_param": [5, 6, 7, 8, 9, 10, 11, 12], "simpl": [5, 6, 7, 8, 9, 10, 11, 12, 20], "well": [5, 6, 7, 8, 9, 10, 11, 12], "nest": [5, 6, 7, 8, 9, 10, 11, 12], "latter": [5, 6, 7, 8, 9, 10, 11, 12], "form": [5, 6, 7, 8, 9, 10, 11, 12], "compon": [5, 6, 7, 8, 9, 10, 11, 12], "__": [5, 6, 7, 8, 9, 10, 11, 12], "so": [5, 6, 7, 8, 9, 10, 11, 12], "": [5, 6, 7, 8, 9, 10, 11, 12], "possibl": [5, 6, 7, 8, 9, 10, 11, 12], "updat": [5, 6, 7, 8, 9, 10, 11, 12], "vector": [5, 7, 18], "length": 5, "n_sentenc": [5, 7, 9], "singl": 5, "list": [5, 7, 8, 9, 10, 11, 12], "dictionari": [5, 10, 11, 12], "embedding_s": 5, "prompt": [6, 13, 17], "retriev": [6, 7, 8, 9, 13, 17, 19], "use_retrieved_context": 6, "queri": [6, 7, 8, 9, 18], "onc": [6, 19], "context": [6, 8, 18, 19], "request": [6, 19], "languag": [6, 19], "expect": [6, 8, 9], "implement": [6, 8, 18, 19], "__call__": 6, "take": 6, "respons": 6, "It": [6, 10, 20], "instruct": 6, "base": [6, 7, 8], "befor": 6, "count_vector": 7, "top_k": [7, 9], "b": 7, "0": 7, "75": 7, "k1": 7, "6": 7, "k": [7, 9], "nearest": [7, 9, 18], "neighbor": [7, 9, 18], "lexic": [7, 8, 17, 19], "search": [7, 19], "bm25": 7, "count": 7, "term": 7, "document": [7, 8, 9, 10, 11, 12, 13, 17, 18], "feature_extract": 7, "countvector": [7, 18], "number": [7, 8, 9, 11, 12], "attribut": [7, 9, 11, 12], "x_fit_": [7, 9], "x_embedded_": [7, 9], "vocabulari": [7, 8, 18], "idf": [7, 8, 18], "most": [7, 8, 9], "relev": [7, 8, 9], "cross_encod": 8, "min_top_k": 8, "max_top_k": 8, "threshold": 8, "drop_dupl": 8, "hybrid": 8, "semant": [8, 9, 17, 19], "encod": [8, 18], "rerank": [8, 17, 19], "accept": 8, "case": [8, 18], "result": [8, 18], "crossencod": 8, "minimum": 8, "less": 8, "than": 8, "maximum": 8, "float": 8, "filter": 8, "score": [8, 18], "drop": 8, "duplic": 8, "step": 8, "done": 8, "after": 8, "index": 9, "faiss": [9, 18], "inner": 9, "product": 9, "cosin": 9, "mean": 9, "normal": 9, "scrape": [10, 11, 12, 13, 17], "extract": [10, 11, 12, 20], "numpydoc": [10, 20], "process": [10, 12], "templat": 10, "pathlib": [10, 11, 12], "chunk_siz": [11, 12], "300": [11, 12], "chunk_overlap": [11, 12], "50": [11, 12], "n_job": [11, 12], "chunk": [11, 12, 18, 20], "split": [11, 12], "overlap": [11, 12], "between": [11, 12, 18], "two": [11, 12, 18], "consecut": [11, 12], "job": [11, 12], "parallel": [11, 12], "core": [11, 12], "text_splitter_": [11, 12], "langchain": [11, 12], "text_splitt": [11, 12], "recursivecharactertextsplitt": [11, 12], "splitter": [11, 12], "folders_to_exclud": 12, "string": 12, "correspond": 12, "exclud": 12, "full": 13, "apinumpydocextractor": [13, 20], "galleryexampleextractor": 13, "userguidedocextractor": [13, 20], "sentencetransform": [13, 18], "bm25retriev": [13, 18], "retrieverrerank": [13, 18], "semanticretriev": [13, 18], "basicpromptingstrategi": [13, 19], "websit": [16, 20], "info": 17, "regard": 17, "merg": 17, "differenti": 18, "exact": 18, "match": 18, "tf": 18, "weight": 18, "scheme": 18, "seen": 18, "flexibl": 18, "space": 18, "databas": 18, "closest": 18, "given": 18, "approxim": 18, "algorithm": 18, "As": 18, "ani": [18, 20], "both": 18, "our": 18, "microsoft": 18, "bing": 18, "pair": 18, "larg": 19, "meaning": 20, "advanc": 20, "scraper": 20, "inde": 20, "pars": 20, "section": 20, "while": 20, "don": 20, "t": 20, "control": 20, "thei": 20, "alwai": 20, "hope": 20, "remov": 20, "ambigu": 20, "could": 20, "exist": 20, "when": 20, "without": 20, "webpag": 20, "addition": 20}, "objects": {"ragger_duck": [[4, 0, 0, "-", "embedding"], [14, 0, 0, "-", "prompt"], [15, 0, 0, "-", "retrieval"], [16, 0, 0, "-", "scraping"]], "ragger_duck.embedding": [[5, 1, 1, "", "SentenceTransformer"]], "ragger_duck.embedding.SentenceTransformer": [[5, 2, 1, "", "fit"], [5, 2, 1, "", "fit_transform"], [5, 2, 1, "", "get_metadata_routing"], [5, 2, 1, "", "get_params"], [5, 2, 1, "", "set_output"], [5, 2, 1, "", "set_params"], [5, 2, 1, "", "transform"]], "ragger_duck.prompt": [[6, 1, 1, "", "BasicPromptingStrategy"]], "ragger_duck.prompt.BasicPromptingStrategy": [[6, 2, 1, "", "get_metadata_routing"], [6, 2, 1, "", "get_params"], [6, 2, 1, "", "set_params"]], "ragger_duck.retrieval": [[7, 1, 1, "", "BM25Retriever"], [8, 1, 1, "", "RetrieverReranker"], [9, 1, 1, "", "SemanticRetriever"]], "ragger_duck.retrieval.BM25Retriever": [[7, 2, 1, "", "fit"], [7, 2, 1, "", "get_metadata_routing"], [7, 2, 1, "", "get_params"], [7, 2, 1, "", "query"], [7, 2, 1, "", "set_params"]], "ragger_duck.retrieval.RetrieverReranker": [[8, 2, 1, "", "fit"], [8, 2, 1, "", "get_metadata_routing"], [8, 2, 1, "", "get_params"], [8, 2, 1, "", "query"], [8, 2, 1, "", "set_params"]], "ragger_duck.retrieval.SemanticRetriever": [[9, 2, 1, "", "fit"], [9, 2, 1, "", "get_metadata_routing"], [9, 2, 1, "", "get_params"], [9, 2, 1, "", "query"], [9, 2, 1, "", "set_params"]], "ragger_duck.scraping": [[10, 1, 1, "", "APINumPyDocExtractor"], [11, 1, 1, "", "GalleryExampleExtractor"], [12, 1, 1, "", "UserGuideDocExtractor"]], "ragger_duck.scraping.APINumPyDocExtractor": [[10, 2, 1, "", "fit"], [10, 2, 1, "", "fit_transform"], [10, 2, 1, "", "get_metadata_routing"], [10, 2, 1, "", "get_params"], [10, 2, 1, "", "set_output"], [10, 2, 1, "", "set_params"], [10, 2, 1, "", "transform"]], "ragger_duck.scraping.GalleryExampleExtractor": [[11, 2, 1, "", "fit"], [11, 2, 1, "", "fit_transform"], [11, 2, 1, "", "get_metadata_routing"], [11, 2, 1, "", "get_params"], [11, 2, 1, "", "set_output"], [11, 2, 1, "", "set_params"], [11, 2, 1, "", "transform"]], "ragger_duck.scraping.UserGuideDocExtractor": [[12, 2, 1, "", "fit"], [12, 2, 1, "", "fit_transform"], [12, 2, 1, "", "get_metadata_routing"], [12, 2, 1, "", "get_params"], [12, 2, 1, "", "set_output"], [12, 2, 1, "", "set_params"], [12, 2, 1, "", "transform"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"]}, "titleterms": {"about": 0, "u": 0, "exampl": 1, "ragger": [2, 3], "duck": [2, 3], "document": [2, 3, 16, 19, 20], "get": 3, "start": 3, "us": 3, "librari": 3, "deploi": 3, "clone": 3, "project": 3, "build": 3, "scikit": 3, "learn": 3, "train": 3, "semant": [3, 18], "lexic": [3, 18], "retriev": [3, 15, 18], "download": 3, "larg": 3, "languag": 3, "model": 3, "launch": 3, "web": 3, "consol": 3, "embed": 4, "sentencetransform": 5, "basicpromptingstrategi": 6, "bm25retriev": 7, "retrieverrerank": 8, "semanticretriev": 9, "apinumpydocextractor": 10, "galleryexampleextractor": 11, "userguidedocextractor": 12, "api": [13, 19, 20], "refer": 13, "prompt": [14, 19], "scrape": [16, 20], "user": [17, 20], "guid": [17, 20], "implement": 17, "detail": 17, "rerank": 18, "merg": 18, "text": 20, "releas": 21, "histori": 21, "version": [21, 22], "0": [21, 22], "1": [21, 22], "changelog": [21, 22]}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 60}, "alltitles": {"About us": [[0, "about-us"]], "Examples": [[1, "examples"]], "Ragger Duck documentation": [[2, "ragger-duck-documentation"]], "Getting Started": [[3, "getting-started"]], "Use the Ragger Duck library": [[3, "use-the-ragger-duck-library"]], "Deploy Ragger Duck": [[3, "deploy-ragger-duck"]], "Cloning the project": [[3, "cloning-the-project"]], "Build the scikit-learn documentation": [[3, "build-the-scikit-learn-documentation"]], "Train the semantic and lexical retrievers": [[3, "train-the-semantic-and-lexical-retrievers"]], "Download the Large Language Model": [[3, "download-the-large-language-model"]], "Launch the Web Console": [[3, "launch-the-web-console"]], "Embedding": [[4, "module-ragger_duck.embedding"]], "SentenceTransformer": [[5, "sentencetransformer"]], "BasicPromptingStrategy": [[6, "basicpromptingstrategy"]], "BM25Retriever": [[7, "bm25retriever"]], "RetrieverReranker": [[8, "retrieverreranker"]], "SemanticRetriever": [[9, "semanticretriever"]], "APINumPyDocExtractor": [[10, "apinumpydocextractor"]], "GalleryExampleExtractor": [[11, "galleryexampleextractor"]], "UserGuideDocExtractor": [[12, "userguidedocextractor"]], "API reference": [[13, "api-reference"]], "Prompt": [[14, "module-ragger_duck.prompt"]], "Retrieval": [[15, "module-ragger_duck.retrieval"]], "Scraping the documentation": [[16, "module-ragger_duck.scraping"]], "User Guide": [[17, "user-guide"]], "Implementation details": [[17, "implementation-details"]], "Retriever": [[18, "retriever"]], "Lexical retrievers": [[18, "lexical-retrievers"]], "Semantical retrievers": [[18, "semantical-retrievers"]], "Reranker: merging lexical and semantical retrievers": [[18, "reranker-merging-lexical-and-semantical-retrievers"]], "Prompting": [[19, "prompting"]], "Prompting for API documentation": [[19, "prompting-for-api-documentation"]], "Text Scraping": [[20, "text-scraping"]], "API documentation": [[20, "api-documentation"]], "User Guide documentation": [[20, "user-guide-documentation"]], "Release history": [[21, "release-history"]], "Version 0.1": [[21, "version-0-1"], [22, "version-0-1"]], "Changelog": [[21, "changelog"], [22, "changelog"]]}, "indexentries": {"module": [[4, "module-ragger_duck.embedding"], [14, "module-ragger_duck.prompt"], [15, "module-ragger_duck.retrieval"], [16, "module-ragger_duck.scraping"]], "ragger_duck.embedding": [[4, "module-ragger_duck.embedding"]], "sentencetransformer (class in ragger_duck.embedding)": [[5, "ragger_duck.embedding.SentenceTransformer"]], "fit() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.fit"]], "fit_transform() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.fit_transform"]], "get_metadata_routing() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.get_metadata_routing"]], "get_params() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.get_params"]], "set_output() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.set_output"]], "set_params() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.set_params"]], "transform() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.transform"]], "basicpromptingstrategy (class in ragger_duck.prompt)": [[6, "ragger_duck.prompt.BasicPromptingStrategy"]], "get_metadata_routing() (ragger_duck.prompt.basicpromptingstrategy method)": [[6, "ragger_duck.prompt.BasicPromptingStrategy.get_metadata_routing"]], "get_params() (ragger_duck.prompt.basicpromptingstrategy method)": [[6, "ragger_duck.prompt.BasicPromptingStrategy.get_params"]], "set_params() (ragger_duck.prompt.basicpromptingstrategy method)": [[6, "ragger_duck.prompt.BasicPromptingStrategy.set_params"]], "bm25retriever (class in ragger_duck.retrieval)": [[7, "ragger_duck.retrieval.BM25Retriever"]], "fit() (ragger_duck.retrieval.bm25retriever method)": [[7, "ragger_duck.retrieval.BM25Retriever.fit"]], "get_metadata_routing() (ragger_duck.retrieval.bm25retriever method)": [[7, "ragger_duck.retrieval.BM25Retriever.get_metadata_routing"]], "get_params() (ragger_duck.retrieval.bm25retriever method)": [[7, "ragger_duck.retrieval.BM25Retriever.get_params"]], "query() (ragger_duck.retrieval.bm25retriever method)": [[7, "ragger_duck.retrieval.BM25Retriever.query"]], "set_params() (ragger_duck.retrieval.bm25retriever method)": [[7, "ragger_duck.retrieval.BM25Retriever.set_params"]], "retrieverreranker (class in ragger_duck.retrieval)": [[8, "ragger_duck.retrieval.RetrieverReranker"]], "fit() (ragger_duck.retrieval.retrieverreranker method)": [[8, "ragger_duck.retrieval.RetrieverReranker.fit"]], "get_metadata_routing() (ragger_duck.retrieval.retrieverreranker method)": [[8, "ragger_duck.retrieval.RetrieverReranker.get_metadata_routing"]], "get_params() (ragger_duck.retrieval.retrieverreranker method)": [[8, "ragger_duck.retrieval.RetrieverReranker.get_params"]], "query() (ragger_duck.retrieval.retrieverreranker method)": [[8, "ragger_duck.retrieval.RetrieverReranker.query"]], "set_params() (ragger_duck.retrieval.retrieverreranker method)": [[8, "ragger_duck.retrieval.RetrieverReranker.set_params"]], "semanticretriever (class in ragger_duck.retrieval)": [[9, "ragger_duck.retrieval.SemanticRetriever"]], "fit() (ragger_duck.retrieval.semanticretriever method)": [[9, "ragger_duck.retrieval.SemanticRetriever.fit"]], "get_metadata_routing() (ragger_duck.retrieval.semanticretriever method)": [[9, "ragger_duck.retrieval.SemanticRetriever.get_metadata_routing"]], "get_params() (ragger_duck.retrieval.semanticretriever method)": [[9, "ragger_duck.retrieval.SemanticRetriever.get_params"]], "query() (ragger_duck.retrieval.semanticretriever method)": [[9, "ragger_duck.retrieval.SemanticRetriever.query"]], "set_params() (ragger_duck.retrieval.semanticretriever method)": [[9, "ragger_duck.retrieval.SemanticRetriever.set_params"]], "apinumpydocextractor (class in ragger_duck.scraping)": [[10, "ragger_duck.scraping.APINumPyDocExtractor"]], "fit() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.fit"]], "fit_transform() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.fit_transform"]], "get_metadata_routing() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.get_metadata_routing"]], "get_params() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.get_params"]], "set_output() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.set_output"]], "set_params() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.set_params"]], "transform() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.transform"]], "galleryexampleextractor (class in ragger_duck.scraping)": [[11, "ragger_duck.scraping.GalleryExampleExtractor"]], "fit() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.fit"]], "fit_transform() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.fit_transform"]], "get_metadata_routing() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.get_metadata_routing"]], "get_params() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.get_params"]], "set_output() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.set_output"]], "set_params() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.set_params"]], "transform() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.transform"]], "userguidedocextractor (class in ragger_duck.scraping)": [[12, "ragger_duck.scraping.UserGuideDocExtractor"]], "fit() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.fit"]], "fit_transform() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.fit_transform"]], "get_metadata_routing() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.get_metadata_routing"]], "get_params() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.get_params"]], "set_output() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.set_output"]], "set_params() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.set_params"]], "transform() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.transform"]], "ragger_duck.prompt": [[14, "module-ragger_duck.prompt"]], "ragger_duck.retrieval": [[15, "module-ragger_duck.retrieval"]], "ragger_duck.scraping": [[16, "module-ragger_duck.scraping"]]}})
\ No newline at end of file