From 2e31347d6781d7ecac942e5c6e60ece4e7bd94d8 Mon Sep 17 00:00:00 2001 From: glemaitre Date: Wed, 17 Apr 2024 09:09:15 +0000 Subject: [PATCH] [ci skip] DOC add info how to install pixi bd29d4e1014e7f427b7dd43b0e23bec3ae03af44 --- _sources/install.rst.txt | 6 +++++- install.html | 6 +++++- searchindex.js | 2 +- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/_sources/install.rst.txt b/_sources/install.rst.txt index 03e6c2e..bb1ba8d 100644 --- a/_sources/install.rst.txt +++ b/_sources/install.rst.txt @@ -19,7 +19,10 @@ Deploy Ragger Duck ================== To ease the deployment, we rely on `pixi`. Refer to following -`link `_ for installing `pixi`. +`link `_ for installing `pixi` but in short, for the +currently supported platform, the following should be enough:: + + curl -fsSL https://pixi.sh/install.sh | bash In the latest stage, `pixi` will be in charge to create the Python environments to build the scikit-learn documentation, train the retrievers, and launch the Web Console. @@ -37,6 +40,7 @@ Note that you can modify the `pixi.toml` to create your own environments since t cuda version used in the `cuda-12-1` or `cuda-11-7` environment might not suits your needs. + Cloning the project ------------------- diff --git a/install.html b/install.html index c89a748..fa70d71 100644 --- a/install.html +++ b/install.html @@ -441,7 +441,11 @@

Use the Ragger Duck library

Deploy Ragger Duck#

To ease the deployment, we rely on pixi. Refer to following -link for installing pixi.

+link for installing pixi but in short, for the +currently supported platform, the following should be enough:

+
curl -fsSL https://pixi.sh/install.sh | bash
+
+

In the latest stage, pixi will be in charge to create the Python environments to build the scikit-learn documentation, train the retrievers, and launch the Web Console. We already setup several environments for you depending on the platform and hardware diff --git a/searchindex.js b/searchindex.js index ceec4db..2aa28d6 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["about", "auto_examples/index", "index", "install", "references/embedding", "references/generated/ragger_duck.embedding.SentenceTransformer", "references/generated/ragger_duck.prompt.BasicPromptingStrategy", "references/generated/ragger_duck.retrieval.BM25Retriever", "references/generated/ragger_duck.retrieval.RetrieverReranker", "references/generated/ragger_duck.retrieval.SemanticRetriever", "references/generated/ragger_duck.scraping.APINumPyDocExtractor", "references/generated/ragger_duck.scraping.GalleryExampleExtractor", "references/generated/ragger_duck.scraping.UserGuideDocExtractor", "references/index", "references/prompt", "references/retrieval", "references/scraping", "user_guide/index", "user_guide/information_retrieval", "user_guide/large_language_model", "user_guide/text_scraping", "whats_new", "whats_new/v0.1"], "filenames": ["about.rst", "auto_examples/index.rst", "index.rst", "install.rst", "references/embedding.rst", "references/generated/ragger_duck.embedding.SentenceTransformer.rst", "references/generated/ragger_duck.prompt.BasicPromptingStrategy.rst", "references/generated/ragger_duck.retrieval.BM25Retriever.rst", "references/generated/ragger_duck.retrieval.RetrieverReranker.rst", "references/generated/ragger_duck.retrieval.SemanticRetriever.rst", "references/generated/ragger_duck.scraping.APINumPyDocExtractor.rst", "references/generated/ragger_duck.scraping.GalleryExampleExtractor.rst", "references/generated/ragger_duck.scraping.UserGuideDocExtractor.rst", "references/index.rst", "references/prompt.rst", "references/retrieval.rst", "references/scraping.rst", "user_guide/index.rst", "user_guide/information_retrieval.rst", "user_guide/large_language_model.rst", "user_guide/text_scraping.rst", "whats_new.rst", "whats_new/v0.1.rst"], "titles": ["About us", "Examples", "Ragger Duck documentation", "Getting Started", "Embedding", "SentenceTransformer", "BasicPromptingStrategy", "BM25Retriever", "RetrieverReranker", "SemanticRetriever", "APINumPyDocExtractor", "GalleryExampleExtractor", "UserGuideDocExtractor", "API reference", "Prompt", "Retrieval", "Scraping the documentation", "User guide", "Retriever", "Prompting", "Text Scraping", "Release history", "Version 0.1"], "terms": {"thi": [0, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 20], "i": [0, 1, 2, 3, 5, 7, 8, 9, 10, 11, 12, 13, 18, 19, 20], "sundai": [0, 3], "afternoon": [0, 3], "project": [0, 2, 18], "One": 1, "dai": 1, "write": 1, "some": [1, 2, 3, 17, 20], "right": [1, 8], "now": [1, 3], "someth": 1, "locat": 1, "script": 1, "folder": [1, 10, 11, 12], "date": 2, "apr": 2, "17": 2, "2024": 2, "version": [2, 3, 5, 10, 11, 12], "us": [2, 5, 6, 7, 8, 9, 10, 11, 12, 18, 20], "link": [2, 3], "sourc": [2, 3, 10, 11, 12], "repositori": [2, 3, 5], "issu": 2, "idea": [2, 18], "sklearn": [2, 3, 7], "The": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16, 20], "provid": [2, 3, 18, 20], "set": [2, 3, 5, 6, 7, 8, 9, 10, 11, 12], "tool": 2, "build": [2, 9, 18, 20], "rag": [2, 3, 17], "answer": [2, 6, 19], "question": [2, 19], "about": 2, "scikit": [2, 5, 9, 16], "learn": [2, 5, 9, 16], "librari": [2, 9], "get": [2, 5, 6, 7, 8, 9, 10, 11, 12, 19], "start": 2, "check": [2, 5, 6, 7, 8, 9, 10, 11, 12], "out": 2, "guid": [2, 3, 5, 6, 7, 8, 9, 10, 11, 12], "instal": [2, 3], "extra": 2, "inform": [2, 5, 6, 7, 8, 9, 10, 11, 12, 20], "new": [2, 5, 10, 11, 12], "contribut": 2, "also": [2, 3], "To": [2, 3], "guidelin": 2, "user": [2, 3, 5, 6, 7, 8, 9, 10, 11, 12], "depth": 2, "kei": [2, 5], "concept": 2, "background": 2, "explan": 2, "api": [2, 3, 5, 9, 10, 11, 12, 17], "refer": [2, 3, 20], "contain": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16, 20], "detail": [2, 3], "descript": 2, "avail": [2, 18], "exampl": [2, 3, 5, 10, 11, 12], "galleri": [2, 3, 11], "good": 2, "place": 2, "see": [2, 5, 10, 11, 12], "action": 2, "select": [2, 3], "an": [2, 3, 5, 6, 9, 10, 11, 12, 18], "dive": 2, "sinc": 3, "yet": 3, "wai": [3, 20], "packag": [3, 13], "easi": 3, "dirti": 3, "add": 3, "your": 3, "path": [3, 5, 10, 11, 12], "moment": 3, "import": 3, "sy": 3, "path_to_packag": 3, "ragger_duck": [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16], "append": 3, "eas": 3, "deploy": 3, "we": [3, 6, 8, 9, 18, 19, 20], "reli": 3, "pixi": 3, "follow": [3, 5, 8, 9], "In": [3, 18, 21, 22], "latest": 3, "stage": 3, "charg": 3, "creat": [3, 5, 9], "python": 3, "environ": 3, "alreadi": 3, "setup": 3, "sever": [3, 8], "you": [3, 8], "depend": 3, "platform": 3, "hardwar": 3, "dispos": 3, "cpu": [3, 5, 11, 12], "cross": [3, 8, 18], "e": [3, 5], "linux": 3, "maco": 3, "x86_64": 3, "arm64": 3, "mp": [3, 5], "m1": 3, "m2": 3, "m3": 3, "chip": 3, "cuda": [3, 5], "12": 3, "1": [3, 5, 7, 9, 10, 11, 12], "machin": 3, "gpu": [3, 5], "support": 3, "make": [3, 18], "experi": 3, "scalewai": 3, "instanc": [3, 5, 6, 7, 8, 9, 10, 11, 12], "l4": 3, "11": 3, "7": 3, "similar": [3, 9, 18], "instead": 3, "note": [3, 8], "can": [3, 5, 8, 10, 12, 20], "modifi": 3, "toml": 3, "own": 3, "might": 3, "suit": 3, "need": [3, 18], "github": 3, "self": [3, 5, 6, 7, 8, 9, 10, 11, 12], "all": [3, 8], "necessari": 3, "file": 3, "recurs": 3, "submodul": 3, "git": 3, "com": 3, "glemaitr": 3, "first": [3, 5], "html": [3, 12], "gener": 3, "page": [3, 12], "run": [3, 11, 12], "command": 3, "frozen": 3, "doc": 3, "have": [3, 5, 6, 7, 8, 9, 10, 11, 12, 18], "differ": [3, 20], "each": [3, 5, 6, 7, 8, 9, 10, 11, 12, 18], "type": [3, 18], "more": [3, 18, 20], "strategi": [3, 6, 19], "propos": 3, "specif": [3, 20], "specifi": 3, "llm": [3, 6, 19], "For": 3, "test": 3, "purpos": 3, "mistral": 3, "7b": 3, "fetch": 3, "ar": [3, 5, 6, 7, 8, 9, 10, 11, 12, 18, 20], "Then": [3, 18], "requir": 3, "which": 3, "want": [3, 8], "offload": 3, "access": 3, "address": 3, "http": 3, "localhost": 3, "8123": 3, "modul": [4, 5, 16, 20], "function": [4, 10, 12, 16, 20], "emb": [4, 5, 9], "transform": [4, 5, 7, 9, 10, 11, 12, 18], "allow": [4, 6], "text": [4, 5, 7, 10, 11, 12, 17], "class": [5, 6, 7, 8, 9, 10, 11, 12, 20], "embed": [5, 7, 9, 13, 18], "model_name_or_path": 5, "none": [5, 7, 8, 9, 10, 11, 12], "devic": 5, "cache_fold": 5, "use_auth_token": 5, "batch_siz": 5, "32": 5, "show_progress_bar": 5, "true": [5, 6, 7, 8, 9, 10, 11, 12], "sentenc": [5, 9, 18], "thin": 5, "wrapper": 5, "around": 5, "sentence_transform": [5, 8], "thu": 5, "insid": 5, "pipelin": [5, 6, 7, 8, 9, 10, 11, 12], "paramet": [5, 6, 7, 8, 9, 10, 11, 12, 20], "str": [5, 7, 8, 9, 12], "default": [5, 6, 7, 8, 9, 10, 11, 12], "If": [5, 6, 7, 8, 9, 10, 11, 12, 18], "filepath": 5, "disc": 5, "load": 5, "model": [5, 6, 18, 19], "from": [5, 7, 8, 9, 10, 11, 12, 18, 19, 20], "tri": 5, "download": [5, 18], "pre": [5, 18], "train": [5, 7, 8, 9, 18], "fail": 5, "construct": 5, "huggingfac": [5, 18], "name": [5, 6, 7, 8, 9, 10, 11, 12, 18], "iter": 5, "nn": 5, "custom": 5, "scratch": 5, "g": 5, "should": [5, 6], "comput": [5, 7, 8, 9], "store": 5, "bool": [5, 6, 7, 8, 9, 10, 11, 12], "authent": 5, "token": 5, "privat": 5, "int": [5, 7, 8, 9, 11, 12], "batch": 5, "size": [5, 11, 12, 20], "dure": [5, 18], "whether": [5, 6, 8], "show": 5, "progress": [5, 21, 22], "bar": 5, "method": [5, 6, 7, 8, 9, 10, 11, 12], "fit": [5, 7, 8, 9, 10, 11, 12], "x": [5, 7, 8, 9, 10, 11, 12], "y": [5, 7, 8, 9, 10, 11, 12], "No": [5, 10, 11, 12], "op": [5, 10, 11, 12], "oper": [5, 10, 11, 12], "onli": [5, 10, 11, 12, 20], "valid": [5, 10, 11, 12], "ignor": [5, 7, 8, 9, 10, 11, 12], "return": [5, 6, 7, 8, 9, 10, 11, 12], "estim": [5, 6, 7, 8, 9, 10, 11, 12, 20], "fit_transform": [5, 10, 11, 12], "fit_param": [5, 10, 11, 12], "data": [5, 7, 8, 9, 10, 11, 12], "option": [5, 10, 11, 12], "arrai": [5, 10, 11, 12], "like": [5, 10, 11, 12], "shape": [5, 7, 9, 10, 11, 12], "n_sampl": [5, 10, 11, 12], "n_featur": [5, 7, 9, 10, 11, 12], "input": [5, 7, 8, 9, 10, 11, 12], "sampl": [5, 10, 11, 12], "n_output": [5, 10, 11, 12], "target": [5, 10, 11, 12], "valu": [5, 6, 7, 8, 9, 10, 11, 12], "unsupervis": [5, 10, 11, 12], "dict": [5, 6, 7, 8, 9, 10, 11, 12], "addit": [5, 10, 11, 12], "x_new": [5, 10, 11, 12], "ndarrai": [5, 7, 9, 10, 11, 12], "n_features_new": [5, 10, 11, 12], "get_metadata_rout": [5, 6, 7, 8, 9, 10, 11, 12], "metadata": [5, 6, 7, 8, 9, 10, 11, 12], "rout": [5, 6, 7, 8, 9, 10, 11, 12], "object": [5, 6, 7, 8, 9, 10, 11, 12], "pleas": [5, 6, 7, 8, 9, 10, 11, 12], "how": [5, 6, 7, 8, 9, 10, 11, 12], "mechan": [5, 6, 7, 8, 9, 10, 11, 12], "work": [5, 6, 7, 8, 9, 10, 11, 12], "metadatarequest": [5, 6, 7, 8, 9, 10, 11, 12], "A": [5, 6, 7, 8, 9, 10, 11, 12], "encapsul": [5, 6, 7, 8, 9, 10, 11, 12], "get_param": [5, 6, 7, 8, 9, 10, 11, 12], "deep": [5, 6, 7, 8, 9, 10, 11, 12], "subobject": [5, 6, 7, 8, 9, 10, 11, 12], "param": [5, 6, 7, 8, 9, 10, 11, 12], "map": [5, 6, 7, 8, 9, 10, 11, 12], "set_output": [5, 10, 11, 12], "output": [5, 10, 11, 12], "sphx_glr_auto_examples_miscellaneous_plot_set_output": [5, 10, 11, 12], "py": [5, 10, 11, 12], "panda": [5, 10, 11, 12], "configur": [5, 10, 11, 12], "format": [5, 10, 11, 12], "datafram": [5, 10, 11, 12], "polar": [5, 10, 11, 12], "unchang": [5, 10, 11, 12], "4": [5, 10, 11, 12], "wa": [5, 10, 11, 12], "ad": [5, 10, 11, 12], "set_param": [5, 6, 7, 8, 9, 10, 11, 12], "simpl": [5, 6, 7, 8, 9, 10, 11, 12, 20], "well": [5, 6, 7, 8, 9, 10, 11, 12], "nest": [5, 6, 7, 8, 9, 10, 11, 12], "latter": [5, 6, 7, 8, 9, 10, 11, 12], "form": [5, 6, 7, 8, 9, 10, 11, 12], "compon": [5, 6, 7, 8, 9, 10, 11, 12], "__": [5, 6, 7, 8, 9, 10, 11, 12], "so": [5, 6, 7, 8, 9, 10, 11, 12], "": [5, 6, 7, 8, 9, 10, 11, 12], "possibl": [5, 6, 7, 8, 9, 10, 11, 12], "updat": [5, 6, 7, 8, 9, 10, 11, 12], "vector": [5, 7, 18], "length": 5, "n_sentenc": [5, 7, 9], "singl": 5, "list": [5, 7, 8, 9, 10, 11, 12], "dictionari": [5, 10, 11, 12], "embedding_s": 5, "prompt": [6, 13, 17], "retriev": [6, 7, 8, 9, 13, 17, 19], "use_retrieved_context": 6, "queri": [6, 7, 8, 9, 18], "onc": [6, 19], "context": [6, 8, 18, 19], "request": [6, 19], "languag": [6, 19], "expect": [6, 8, 9], "implement": [6, 8, 18, 19], "__call__": 6, "take": 6, "respons": 6, "It": [6, 10, 20], "instruct": 6, "base": [6, 7, 8], "befor": 6, "count_vector": 7, "top_k": [7, 9], "b": 7, "0": 7, "75": 7, "k1": 7, "6": 7, "k": [7, 9], "nearest": [7, 9, 18], "neighbor": [7, 9, 18], "lexic": [7, 8, 17, 19], "search": [7, 19], "bm25": 7, "count": 7, "term": 7, "document": [7, 8, 9, 10, 11, 12, 13, 17, 18], "feature_extract": 7, "countvector": [7, 18], "number": [7, 8, 9, 11, 12], "attribut": [7, 9, 11, 12], "x_fit_": [7, 9], "x_embedded_": [7, 9], "vocabulari": [7, 8, 18], "idf": [7, 8, 18], "most": [7, 8, 9], "relev": [7, 8, 9], "cross_encod": 8, "min_top_k": 8, "max_top_k": 8, "threshold": 8, "drop_dupl": 8, "hybrid": 8, "semant": [8, 9, 17, 19], "encod": [8, 18], "rerank": [8, 17, 19], "accept": 8, "case": [8, 18], "result": [8, 18], "crossencod": 8, "minimum": 8, "less": 8, "than": 8, "maximum": 8, "float": 8, "filter": 8, "score": [8, 18], "drop": 8, "duplic": 8, "step": 8, "done": 8, "after": 8, "index": 9, "faiss": [9, 18], "inner": 9, "product": 9, "cosin": 9, "mean": 9, "normal": 9, "scrape": [10, 11, 12, 13, 17], "extract": [10, 11, 12, 20], "numpydoc": [10, 20], "process": [10, 12], "templat": 10, "pathlib": [10, 11, 12], "chunk_siz": [11, 12], "300": [11, 12], "chunk_overlap": [11, 12], "50": [11, 12], "n_job": [11, 12], "chunk": [11, 12, 18, 20], "split": [11, 12], "overlap": [11, 12], "between": [11, 12, 18], "two": [11, 12, 18], "consecut": [11, 12], "job": [11, 12], "parallel": [11, 12], "core": [11, 12], "text_splitter_": [11, 12], "langchain": [11, 12], "text_splitt": [11, 12], "recursivecharactertextsplitt": [11, 12], "splitter": [11, 12], "folders_to_exclud": 12, "string": 12, "correspond": 12, "exclud": 12, "full": 13, "apinumpydocextractor": [13, 20], "galleryexampleextractor": 13, "userguidedocextractor": [13, 20], "sentencetransform": [13, 18], "bm25retriev": [13, 18], "retrieverrerank": [13, 18], "semanticretriev": [13, 18], "basicpromptingstrategi": [13, 19], "websit": [16, 20], "info": 17, "regard": 17, "merg": 17, "differenti": 18, "exact": 18, "match": 18, "tf": 18, "weight": 18, "scheme": 18, "seen": 18, "flexibl": 18, "space": 18, "databas": 18, "closest": 18, "given": 18, "approxim": 18, "algorithm": 18, "As": 18, "ani": [18, 20], "both": 18, "our": 18, "microsoft": 18, "bing": 18, "pair": 18, "larg": 19, "meaning": 20, "advanc": 20, "scraper": 20, "inde": 20, "pars": 20, "section": 20, "while": 20, "don": 20, "t": 20, "control": 20, "thei": 20, "alwai": 20, "hope": 20, "remov": 20, "ambigu": 20, "could": 20, "exist": 20, "when": 20, "without": 20, "webpag": 20, "addition": 20}, "objects": {"ragger_duck": [[4, 0, 0, "-", "embedding"], [14, 0, 0, "-", "prompt"], [15, 0, 0, "-", "retrieval"], [16, 0, 0, "-", "scraping"]], "ragger_duck.embedding": [[5, 1, 1, "", "SentenceTransformer"]], "ragger_duck.embedding.SentenceTransformer": [[5, 2, 1, "", "fit"], [5, 2, 1, "", "fit_transform"], [5, 2, 1, "", "get_metadata_routing"], [5, 2, 1, "", "get_params"], [5, 2, 1, "", "set_output"], [5, 2, 1, "", "set_params"], [5, 2, 1, "", "transform"]], "ragger_duck.prompt": [[6, 1, 1, "", "BasicPromptingStrategy"]], "ragger_duck.prompt.BasicPromptingStrategy": [[6, 2, 1, "", "get_metadata_routing"], [6, 2, 1, "", "get_params"], [6, 2, 1, "", "set_params"]], "ragger_duck.retrieval": [[7, 1, 1, "", "BM25Retriever"], [8, 1, 1, "", "RetrieverReranker"], [9, 1, 1, "", "SemanticRetriever"]], "ragger_duck.retrieval.BM25Retriever": [[7, 2, 1, "", "fit"], [7, 2, 1, "", "get_metadata_routing"], [7, 2, 1, "", "get_params"], [7, 2, 1, "", "query"], [7, 2, 1, "", "set_params"]], "ragger_duck.retrieval.RetrieverReranker": [[8, 2, 1, "", "fit"], [8, 2, 1, "", "get_metadata_routing"], [8, 2, 1, "", "get_params"], [8, 2, 1, "", "query"], [8, 2, 1, "", "set_params"]], "ragger_duck.retrieval.SemanticRetriever": [[9, 2, 1, "", "fit"], [9, 2, 1, "", "get_metadata_routing"], [9, 2, 1, "", "get_params"], [9, 2, 1, "", "query"], [9, 2, 1, "", "set_params"]], "ragger_duck.scraping": [[10, 1, 1, "", "APINumPyDocExtractor"], [11, 1, 1, "", "GalleryExampleExtractor"], [12, 1, 1, "", "UserGuideDocExtractor"]], "ragger_duck.scraping.APINumPyDocExtractor": [[10, 2, 1, "", "fit"], [10, 2, 1, "", "fit_transform"], [10, 2, 1, "", "get_metadata_routing"], [10, 2, 1, "", "get_params"], [10, 2, 1, "", "set_output"], [10, 2, 1, "", "set_params"], [10, 2, 1, "", "transform"]], "ragger_duck.scraping.GalleryExampleExtractor": [[11, 2, 1, "", "fit"], [11, 2, 1, "", "fit_transform"], [11, 2, 1, "", "get_metadata_routing"], [11, 2, 1, "", "get_params"], [11, 2, 1, "", "set_output"], [11, 2, 1, "", "set_params"], [11, 2, 1, "", "transform"]], "ragger_duck.scraping.UserGuideDocExtractor": [[12, 2, 1, "", "fit"], [12, 2, 1, "", "fit_transform"], [12, 2, 1, "", "get_metadata_routing"], [12, 2, 1, "", "get_params"], [12, 2, 1, "", "set_output"], [12, 2, 1, "", "set_params"], [12, 2, 1, "", "transform"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"]}, "titleterms": {"about": 0, "u": 0, "exampl": 1, "ragger": [2, 3], "duck": [2, 3], "document": [2, 3, 16, 19, 20], "get": 3, "start": 3, "us": 3, "librari": 3, "deploi": 3, "clone": 3, "project": 3, "build": 3, "scikit": 3, "learn": 3, "train": 3, "semant": [3, 18], "lexic": [3, 18], "retriev": [3, 15, 18], "download": 3, "larg": 3, "languag": 3, "model": 3, "launch": 3, "web": 3, "consol": 3, "embed": 4, "sentencetransform": 5, "basicpromptingstrategi": 6, "bm25retriev": 7, "retrieverrerank": 8, "semanticretriev": 9, "apinumpydocextractor": 10, "galleryexampleextractor": 11, "userguidedocextractor": 12, "api": [13, 19, 20], "refer": 13, "prompt": [14, 19], "scrape": [16, 20], "user": [17, 20], "guid": [17, 20], "implement": 17, "detail": 17, "rerank": 18, "merg": 18, "text": 20, "releas": 21, "histori": 21, "version": [21, 22], "0": [21, 22], "1": [21, 22], "changelog": [21, 22]}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 60}, "alltitles": {"About us": [[0, "about-us"]], "Examples": [[1, "examples"]], "Ragger Duck documentation": [[2, "ragger-duck-documentation"]], "Getting Started": [[3, "getting-started"]], "Use the Ragger Duck library": [[3, "use-the-ragger-duck-library"]], "Deploy Ragger Duck": [[3, "deploy-ragger-duck"]], "Cloning the project": [[3, "cloning-the-project"]], "Build the scikit-learn documentation": [[3, "build-the-scikit-learn-documentation"]], "Train the semantic and lexical retrievers": [[3, "train-the-semantic-and-lexical-retrievers"]], "Download the Large Language Model": [[3, "download-the-large-language-model"]], "Launch the Web Console": [[3, "launch-the-web-console"]], "Embedding": [[4, "module-ragger_duck.embedding"]], "SentenceTransformer": [[5, "sentencetransformer"]], "BasicPromptingStrategy": [[6, "basicpromptingstrategy"]], "BM25Retriever": [[7, "bm25retriever"]], "RetrieverReranker": [[8, "retrieverreranker"]], "SemanticRetriever": [[9, "semanticretriever"]], "APINumPyDocExtractor": [[10, "apinumpydocextractor"]], "GalleryExampleExtractor": [[11, "galleryexampleextractor"]], "UserGuideDocExtractor": [[12, "userguidedocextractor"]], "API reference": [[13, "api-reference"]], "Prompt": [[14, "module-ragger_duck.prompt"]], "Retrieval": [[15, "module-ragger_duck.retrieval"]], "Scraping the documentation": [[16, "module-ragger_duck.scraping"]], "User Guide": [[17, "user-guide"]], "Implementation details": [[17, "implementation-details"]], "Retriever": [[18, "retriever"]], "Lexical retrievers": [[18, "lexical-retrievers"]], "Semantical retrievers": [[18, "semantical-retrievers"]], "Reranker: merging lexical and semantical retrievers": [[18, "reranker-merging-lexical-and-semantical-retrievers"]], "Prompting": [[19, "prompting"]], "Prompting for API documentation": [[19, "prompting-for-api-documentation"]], "Text Scraping": [[20, "text-scraping"]], "API documentation": [[20, "api-documentation"]], "User Guide documentation": [[20, "user-guide-documentation"]], "Release history": [[21, "release-history"]], "Version 0.1": [[21, "version-0-1"], [22, "version-0-1"]], "Changelog": [[21, "changelog"], [22, "changelog"]]}, "indexentries": {"module": [[4, "module-ragger_duck.embedding"], [14, "module-ragger_duck.prompt"], [15, "module-ragger_duck.retrieval"], [16, "module-ragger_duck.scraping"]], "ragger_duck.embedding": [[4, "module-ragger_duck.embedding"]], "sentencetransformer (class in ragger_duck.embedding)": [[5, "ragger_duck.embedding.SentenceTransformer"]], "fit() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.fit"]], "fit_transform() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.fit_transform"]], "get_metadata_routing() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.get_metadata_routing"]], "get_params() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.get_params"]], "set_output() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.set_output"]], "set_params() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.set_params"]], "transform() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.transform"]], "basicpromptingstrategy (class in ragger_duck.prompt)": [[6, "ragger_duck.prompt.BasicPromptingStrategy"]], "get_metadata_routing() (ragger_duck.prompt.basicpromptingstrategy method)": [[6, "ragger_duck.prompt.BasicPromptingStrategy.get_metadata_routing"]], "get_params() (ragger_duck.prompt.basicpromptingstrategy method)": [[6, "ragger_duck.prompt.BasicPromptingStrategy.get_params"]], "set_params() (ragger_duck.prompt.basicpromptingstrategy method)": [[6, "ragger_duck.prompt.BasicPromptingStrategy.set_params"]], "bm25retriever (class in ragger_duck.retrieval)": [[7, "ragger_duck.retrieval.BM25Retriever"]], "fit() (ragger_duck.retrieval.bm25retriever method)": [[7, "ragger_duck.retrieval.BM25Retriever.fit"]], "get_metadata_routing() (ragger_duck.retrieval.bm25retriever method)": [[7, "ragger_duck.retrieval.BM25Retriever.get_metadata_routing"]], "get_params() (ragger_duck.retrieval.bm25retriever method)": [[7, "ragger_duck.retrieval.BM25Retriever.get_params"]], "query() (ragger_duck.retrieval.bm25retriever method)": [[7, "ragger_duck.retrieval.BM25Retriever.query"]], "set_params() (ragger_duck.retrieval.bm25retriever method)": [[7, "ragger_duck.retrieval.BM25Retriever.set_params"]], "retrieverreranker (class in ragger_duck.retrieval)": [[8, "ragger_duck.retrieval.RetrieverReranker"]], "fit() (ragger_duck.retrieval.retrieverreranker method)": [[8, "ragger_duck.retrieval.RetrieverReranker.fit"]], "get_metadata_routing() (ragger_duck.retrieval.retrieverreranker method)": [[8, "ragger_duck.retrieval.RetrieverReranker.get_metadata_routing"]], "get_params() (ragger_duck.retrieval.retrieverreranker method)": [[8, "ragger_duck.retrieval.RetrieverReranker.get_params"]], "query() (ragger_duck.retrieval.retrieverreranker method)": [[8, "ragger_duck.retrieval.RetrieverReranker.query"]], "set_params() (ragger_duck.retrieval.retrieverreranker method)": [[8, "ragger_duck.retrieval.RetrieverReranker.set_params"]], "semanticretriever (class in ragger_duck.retrieval)": [[9, "ragger_duck.retrieval.SemanticRetriever"]], "fit() (ragger_duck.retrieval.semanticretriever method)": [[9, "ragger_duck.retrieval.SemanticRetriever.fit"]], "get_metadata_routing() (ragger_duck.retrieval.semanticretriever method)": [[9, "ragger_duck.retrieval.SemanticRetriever.get_metadata_routing"]], "get_params() (ragger_duck.retrieval.semanticretriever method)": [[9, "ragger_duck.retrieval.SemanticRetriever.get_params"]], "query() (ragger_duck.retrieval.semanticretriever method)": [[9, "ragger_duck.retrieval.SemanticRetriever.query"]], "set_params() (ragger_duck.retrieval.semanticretriever method)": [[9, "ragger_duck.retrieval.SemanticRetriever.set_params"]], "apinumpydocextractor (class in ragger_duck.scraping)": [[10, "ragger_duck.scraping.APINumPyDocExtractor"]], "fit() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.fit"]], "fit_transform() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.fit_transform"]], "get_metadata_routing() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.get_metadata_routing"]], "get_params() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.get_params"]], "set_output() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.set_output"]], "set_params() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.set_params"]], "transform() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.transform"]], "galleryexampleextractor (class in ragger_duck.scraping)": [[11, "ragger_duck.scraping.GalleryExampleExtractor"]], "fit() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.fit"]], "fit_transform() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.fit_transform"]], "get_metadata_routing() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.get_metadata_routing"]], "get_params() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.get_params"]], "set_output() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.set_output"]], "set_params() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.set_params"]], "transform() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.transform"]], "userguidedocextractor (class in ragger_duck.scraping)": [[12, "ragger_duck.scraping.UserGuideDocExtractor"]], "fit() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.fit"]], "fit_transform() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.fit_transform"]], "get_metadata_routing() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.get_metadata_routing"]], "get_params() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.get_params"]], "set_output() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.set_output"]], "set_params() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.set_params"]], "transform() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.transform"]], "ragger_duck.prompt": [[14, "module-ragger_duck.prompt"]], "ragger_duck.retrieval": [[15, "module-ragger_duck.retrieval"]], "ragger_duck.scraping": [[16, "module-ragger_duck.scraping"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["about", "auto_examples/index", "index", "install", "references/embedding", "references/generated/ragger_duck.embedding.SentenceTransformer", "references/generated/ragger_duck.prompt.BasicPromptingStrategy", "references/generated/ragger_duck.retrieval.BM25Retriever", "references/generated/ragger_duck.retrieval.RetrieverReranker", "references/generated/ragger_duck.retrieval.SemanticRetriever", "references/generated/ragger_duck.scraping.APINumPyDocExtractor", "references/generated/ragger_duck.scraping.GalleryExampleExtractor", "references/generated/ragger_duck.scraping.UserGuideDocExtractor", "references/index", "references/prompt", "references/retrieval", "references/scraping", "user_guide/index", "user_guide/information_retrieval", "user_guide/large_language_model", "user_guide/text_scraping", "whats_new", "whats_new/v0.1"], "filenames": ["about.rst", "auto_examples/index.rst", "index.rst", "install.rst", "references/embedding.rst", "references/generated/ragger_duck.embedding.SentenceTransformer.rst", "references/generated/ragger_duck.prompt.BasicPromptingStrategy.rst", "references/generated/ragger_duck.retrieval.BM25Retriever.rst", "references/generated/ragger_duck.retrieval.RetrieverReranker.rst", "references/generated/ragger_duck.retrieval.SemanticRetriever.rst", "references/generated/ragger_duck.scraping.APINumPyDocExtractor.rst", "references/generated/ragger_duck.scraping.GalleryExampleExtractor.rst", "references/generated/ragger_duck.scraping.UserGuideDocExtractor.rst", "references/index.rst", "references/prompt.rst", "references/retrieval.rst", "references/scraping.rst", "user_guide/index.rst", "user_guide/information_retrieval.rst", "user_guide/large_language_model.rst", "user_guide/text_scraping.rst", "whats_new.rst", "whats_new/v0.1.rst"], "titles": ["About us", "Examples", "Ragger Duck documentation", "Getting Started", "Embedding", "SentenceTransformer", "BasicPromptingStrategy", "BM25Retriever", "RetrieverReranker", "SemanticRetriever", "APINumPyDocExtractor", "GalleryExampleExtractor", "UserGuideDocExtractor", "API reference", "Prompt", "Retrieval", "Scraping the documentation", "User guide", "Retriever", "Prompting", "Text Scraping", "Release history", "Version 0.1"], "terms": {"thi": [0, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 20], "i": [0, 1, 2, 3, 5, 7, 8, 9, 10, 11, 12, 13, 18, 19, 20], "sundai": [0, 3], "afternoon": [0, 3], "project": [0, 2, 18], "One": 1, "dai": 1, "write": 1, "some": [1, 2, 3, 17, 20], "right": [1, 8], "now": [1, 3], "someth": 1, "locat": 1, "script": 1, "folder": [1, 10, 11, 12], "date": 2, "apr": 2, "17": 2, "2024": 2, "version": [2, 3, 5, 10, 11, 12], "us": [2, 5, 6, 7, 8, 9, 10, 11, 12, 18, 20], "link": [2, 3], "sourc": [2, 3, 10, 11, 12], "repositori": [2, 3, 5], "issu": 2, "idea": [2, 18], "sklearn": [2, 3, 7], "The": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16, 20], "provid": [2, 3, 18, 20], "set": [2, 3, 5, 6, 7, 8, 9, 10, 11, 12], "tool": 2, "build": [2, 9, 18, 20], "rag": [2, 3, 17], "answer": [2, 6, 19], "question": [2, 19], "about": 2, "scikit": [2, 5, 9, 16], "learn": [2, 5, 9, 16], "librari": [2, 9], "get": [2, 5, 6, 7, 8, 9, 10, 11, 12, 19], "start": 2, "check": [2, 5, 6, 7, 8, 9, 10, 11, 12], "out": 2, "guid": [2, 3, 5, 6, 7, 8, 9, 10, 11, 12], "instal": [2, 3], "extra": 2, "inform": [2, 5, 6, 7, 8, 9, 10, 11, 12, 20], "new": [2, 5, 10, 11, 12], "contribut": 2, "also": [2, 3], "To": [2, 3], "guidelin": 2, "user": [2, 3, 5, 6, 7, 8, 9, 10, 11, 12], "depth": 2, "kei": [2, 5], "concept": 2, "background": 2, "explan": 2, "api": [2, 3, 5, 9, 10, 11, 12, 17], "refer": [2, 3, 20], "contain": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16, 20], "detail": [2, 3], "descript": 2, "avail": [2, 18], "exampl": [2, 3, 5, 10, 11, 12], "galleri": [2, 3, 11], "good": 2, "place": 2, "see": [2, 5, 10, 11, 12], "action": 2, "select": [2, 3], "an": [2, 3, 5, 6, 9, 10, 11, 12, 18], "dive": 2, "sinc": 3, "yet": 3, "wai": [3, 20], "packag": [3, 13], "easi": 3, "dirti": 3, "add": 3, "your": 3, "path": [3, 5, 10, 11, 12], "moment": 3, "import": 3, "sy": 3, "path_to_packag": 3, "ragger_duck": [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16], "append": 3, "eas": 3, "deploy": 3, "we": [3, 6, 8, 9, 18, 19, 20], "reli": 3, "pixi": 3, "follow": [3, 5, 8, 9], "short": 3, "current": 3, "support": 3, "platform": 3, "should": [3, 5, 6], "enough": 3, "curl": 3, "fssl": 3, "http": 3, "sh": 3, "bash": 3, "In": [3, 18, 21, 22], "latest": 3, "stage": 3, "charg": 3, "creat": [3, 5, 9], "python": 3, "environ": 3, "alreadi": 3, "setup": 3, "sever": [3, 8], "you": [3, 8], "depend": 3, "hardwar": 3, "dispos": 3, "cpu": [3, 5, 11, 12], "cross": [3, 8, 18], "e": [3, 5], "linux": 3, "maco": 3, "x86_64": 3, "arm64": 3, "mp": [3, 5], "m1": 3, "m2": 3, "m3": 3, "chip": 3, "cuda": [3, 5], "12": 3, "1": [3, 5, 7, 9, 10, 11, 12], "machin": 3, "gpu": [3, 5], "make": [3, 18], "experi": 3, "scalewai": 3, "instanc": [3, 5, 6, 7, 8, 9, 10, 11, 12], "l4": 3, "11": 3, "7": 3, "similar": [3, 9, 18], "instead": 3, "note": [3, 8], "can": [3, 5, 8, 10, 12, 20], "modifi": 3, "toml": 3, "own": 3, "might": 3, "suit": 3, "need": [3, 18], "github": 3, "self": [3, 5, 6, 7, 8, 9, 10, 11, 12], "all": [3, 8], "necessari": 3, "file": 3, "recurs": 3, "submodul": 3, "git": 3, "com": 3, "glemaitr": 3, "first": [3, 5], "html": [3, 12], "gener": 3, "page": [3, 12], "run": [3, 11, 12], "command": 3, "frozen": 3, "doc": 3, "have": [3, 5, 6, 7, 8, 9, 10, 11, 12, 18], "differ": [3, 20], "each": [3, 5, 6, 7, 8, 9, 10, 11, 12, 18], "type": [3, 18], "more": [3, 18, 20], "strategi": [3, 6, 19], "propos": 3, "specif": [3, 20], "specifi": 3, "llm": [3, 6, 19], "For": 3, "test": 3, "purpos": 3, "mistral": 3, "7b": 3, "fetch": 3, "ar": [3, 5, 6, 7, 8, 9, 10, 11, 12, 18, 20], "Then": [3, 18], "requir": 3, "which": 3, "want": [3, 8], "offload": 3, "access": 3, "address": 3, "localhost": 3, "8123": 3, "modul": [4, 5, 16, 20], "function": [4, 10, 12, 16, 20], "emb": [4, 5, 9], "transform": [4, 5, 7, 9, 10, 11, 12, 18], "allow": [4, 6], "text": [4, 5, 7, 10, 11, 12, 17], "class": [5, 6, 7, 8, 9, 10, 11, 12, 20], "embed": [5, 7, 9, 13, 18], "model_name_or_path": 5, "none": [5, 7, 8, 9, 10, 11, 12], "devic": 5, "cache_fold": 5, "use_auth_token": 5, "batch_siz": 5, "32": 5, "show_progress_bar": 5, "true": [5, 6, 7, 8, 9, 10, 11, 12], "sentenc": [5, 9, 18], "thin": 5, "wrapper": 5, "around": 5, "sentence_transform": [5, 8], "thu": 5, "insid": 5, "pipelin": [5, 6, 7, 8, 9, 10, 11, 12], "paramet": [5, 6, 7, 8, 9, 10, 11, 12, 20], "str": [5, 7, 8, 9, 12], "default": [5, 6, 7, 8, 9, 10, 11, 12], "If": [5, 6, 7, 8, 9, 10, 11, 12, 18], "filepath": 5, "disc": 5, "load": 5, "model": [5, 6, 18, 19], "from": [5, 7, 8, 9, 10, 11, 12, 18, 19, 20], "tri": 5, "download": [5, 18], "pre": [5, 18], "train": [5, 7, 8, 9, 18], "fail": 5, "construct": 5, "huggingfac": [5, 18], "name": [5, 6, 7, 8, 9, 10, 11, 12, 18], "iter": 5, "nn": 5, "custom": 5, "scratch": 5, "g": 5, "comput": [5, 7, 8, 9], "store": 5, "bool": [5, 6, 7, 8, 9, 10, 11, 12], "authent": 5, "token": 5, "privat": 5, "int": [5, 7, 8, 9, 11, 12], "batch": 5, "size": [5, 11, 12, 20], "dure": [5, 18], "whether": [5, 6, 8], "show": 5, "progress": [5, 21, 22], "bar": 5, "method": [5, 6, 7, 8, 9, 10, 11, 12], "fit": [5, 7, 8, 9, 10, 11, 12], "x": [5, 7, 8, 9, 10, 11, 12], "y": [5, 7, 8, 9, 10, 11, 12], "No": [5, 10, 11, 12], "op": [5, 10, 11, 12], "oper": [5, 10, 11, 12], "onli": [5, 10, 11, 12, 20], "valid": [5, 10, 11, 12], "ignor": [5, 7, 8, 9, 10, 11, 12], "return": [5, 6, 7, 8, 9, 10, 11, 12], "estim": [5, 6, 7, 8, 9, 10, 11, 12, 20], "fit_transform": [5, 10, 11, 12], "fit_param": [5, 10, 11, 12], "data": [5, 7, 8, 9, 10, 11, 12], "option": [5, 10, 11, 12], "arrai": [5, 10, 11, 12], "like": [5, 10, 11, 12], "shape": [5, 7, 9, 10, 11, 12], "n_sampl": [5, 10, 11, 12], "n_featur": [5, 7, 9, 10, 11, 12], "input": [5, 7, 8, 9, 10, 11, 12], "sampl": [5, 10, 11, 12], "n_output": [5, 10, 11, 12], "target": [5, 10, 11, 12], "valu": [5, 6, 7, 8, 9, 10, 11, 12], "unsupervis": [5, 10, 11, 12], "dict": [5, 6, 7, 8, 9, 10, 11, 12], "addit": [5, 10, 11, 12], "x_new": [5, 10, 11, 12], "ndarrai": [5, 7, 9, 10, 11, 12], "n_features_new": [5, 10, 11, 12], "get_metadata_rout": [5, 6, 7, 8, 9, 10, 11, 12], "metadata": [5, 6, 7, 8, 9, 10, 11, 12], "rout": [5, 6, 7, 8, 9, 10, 11, 12], "object": [5, 6, 7, 8, 9, 10, 11, 12], "pleas": [5, 6, 7, 8, 9, 10, 11, 12], "how": [5, 6, 7, 8, 9, 10, 11, 12], "mechan": [5, 6, 7, 8, 9, 10, 11, 12], "work": [5, 6, 7, 8, 9, 10, 11, 12], "metadatarequest": [5, 6, 7, 8, 9, 10, 11, 12], "A": [5, 6, 7, 8, 9, 10, 11, 12], "encapsul": [5, 6, 7, 8, 9, 10, 11, 12], "get_param": [5, 6, 7, 8, 9, 10, 11, 12], "deep": [5, 6, 7, 8, 9, 10, 11, 12], "subobject": [5, 6, 7, 8, 9, 10, 11, 12], "param": [5, 6, 7, 8, 9, 10, 11, 12], "map": [5, 6, 7, 8, 9, 10, 11, 12], "set_output": [5, 10, 11, 12], "output": [5, 10, 11, 12], "sphx_glr_auto_examples_miscellaneous_plot_set_output": [5, 10, 11, 12], "py": [5, 10, 11, 12], "panda": [5, 10, 11, 12], "configur": [5, 10, 11, 12], "format": [5, 10, 11, 12], "datafram": [5, 10, 11, 12], "polar": [5, 10, 11, 12], "unchang": [5, 10, 11, 12], "4": [5, 10, 11, 12], "wa": [5, 10, 11, 12], "ad": [5, 10, 11, 12], "set_param": [5, 6, 7, 8, 9, 10, 11, 12], "simpl": [5, 6, 7, 8, 9, 10, 11, 12, 20], "well": [5, 6, 7, 8, 9, 10, 11, 12], "nest": [5, 6, 7, 8, 9, 10, 11, 12], "latter": [5, 6, 7, 8, 9, 10, 11, 12], "form": [5, 6, 7, 8, 9, 10, 11, 12], "compon": [5, 6, 7, 8, 9, 10, 11, 12], "__": [5, 6, 7, 8, 9, 10, 11, 12], "so": [5, 6, 7, 8, 9, 10, 11, 12], "": [5, 6, 7, 8, 9, 10, 11, 12], "possibl": [5, 6, 7, 8, 9, 10, 11, 12], "updat": [5, 6, 7, 8, 9, 10, 11, 12], "vector": [5, 7, 18], "length": 5, "n_sentenc": [5, 7, 9], "singl": 5, "list": [5, 7, 8, 9, 10, 11, 12], "dictionari": [5, 10, 11, 12], "embedding_s": 5, "prompt": [6, 13, 17], "retriev": [6, 7, 8, 9, 13, 17, 19], "use_retrieved_context": 6, "queri": [6, 7, 8, 9, 18], "onc": [6, 19], "context": [6, 8, 18, 19], "request": [6, 19], "languag": [6, 19], "expect": [6, 8, 9], "implement": [6, 8, 18, 19], "__call__": 6, "take": 6, "respons": 6, "It": [6, 10, 20], "instruct": 6, "base": [6, 7, 8], "befor": 6, "count_vector": 7, "top_k": [7, 9], "b": 7, "0": 7, "75": 7, "k1": 7, "6": 7, "k": [7, 9], "nearest": [7, 9, 18], "neighbor": [7, 9, 18], "lexic": [7, 8, 17, 19], "search": [7, 19], "bm25": 7, "count": 7, "term": 7, "document": [7, 8, 9, 10, 11, 12, 13, 17, 18], "feature_extract": 7, "countvector": [7, 18], "number": [7, 8, 9, 11, 12], "attribut": [7, 9, 11, 12], "x_fit_": [7, 9], "x_embedded_": [7, 9], "vocabulari": [7, 8, 18], "idf": [7, 8, 18], "most": [7, 8, 9], "relev": [7, 8, 9], "cross_encod": 8, "min_top_k": 8, "max_top_k": 8, "threshold": 8, "drop_dupl": 8, "hybrid": 8, "semant": [8, 9, 17, 19], "encod": [8, 18], "rerank": [8, 17, 19], "accept": 8, "case": [8, 18], "result": [8, 18], "crossencod": 8, "minimum": 8, "less": 8, "than": 8, "maximum": 8, "float": 8, "filter": 8, "score": [8, 18], "drop": 8, "duplic": 8, "step": 8, "done": 8, "after": 8, "index": 9, "faiss": [9, 18], "inner": 9, "product": 9, "cosin": 9, "mean": 9, "normal": 9, "scrape": [10, 11, 12, 13, 17], "extract": [10, 11, 12, 20], "numpydoc": [10, 20], "process": [10, 12], "templat": 10, "pathlib": [10, 11, 12], "chunk_siz": [11, 12], "300": [11, 12], "chunk_overlap": [11, 12], "50": [11, 12], "n_job": [11, 12], "chunk": [11, 12, 18, 20], "split": [11, 12], "overlap": [11, 12], "between": [11, 12, 18], "two": [11, 12, 18], "consecut": [11, 12], "job": [11, 12], "parallel": [11, 12], "core": [11, 12], "text_splitter_": [11, 12], "langchain": [11, 12], "text_splitt": [11, 12], "recursivecharactertextsplitt": [11, 12], "splitter": [11, 12], "folders_to_exclud": 12, "string": 12, "correspond": 12, "exclud": 12, "full": 13, "apinumpydocextractor": [13, 20], "galleryexampleextractor": 13, "userguidedocextractor": [13, 20], "sentencetransform": [13, 18], "bm25retriev": [13, 18], "retrieverrerank": [13, 18], "semanticretriev": [13, 18], "basicpromptingstrategi": [13, 19], "websit": [16, 20], "info": 17, "regard": 17, "merg": 17, "differenti": 18, "exact": 18, "match": 18, "tf": 18, "weight": 18, "scheme": 18, "seen": 18, "flexibl": 18, "space": 18, "databas": 18, "closest": 18, "given": 18, "approxim": 18, "algorithm": 18, "As": 18, "ani": [18, 20], "both": 18, "our": 18, "microsoft": 18, "bing": 18, "pair": 18, "larg": 19, "meaning": 20, "advanc": 20, "scraper": 20, "inde": 20, "pars": 20, "section": 20, "while": 20, "don": 20, "t": 20, "control": 20, "thei": 20, "alwai": 20, "hope": 20, "remov": 20, "ambigu": 20, "could": 20, "exist": 20, "when": 20, "without": 20, "webpag": 20, "addition": 20}, "objects": {"ragger_duck": [[4, 0, 0, "-", "embedding"], [14, 0, 0, "-", "prompt"], [15, 0, 0, "-", "retrieval"], [16, 0, 0, "-", "scraping"]], "ragger_duck.embedding": [[5, 1, 1, "", "SentenceTransformer"]], "ragger_duck.embedding.SentenceTransformer": [[5, 2, 1, "", "fit"], [5, 2, 1, "", "fit_transform"], [5, 2, 1, "", "get_metadata_routing"], [5, 2, 1, "", "get_params"], [5, 2, 1, "", "set_output"], [5, 2, 1, "", "set_params"], [5, 2, 1, "", "transform"]], "ragger_duck.prompt": [[6, 1, 1, "", "BasicPromptingStrategy"]], "ragger_duck.prompt.BasicPromptingStrategy": [[6, 2, 1, "", "get_metadata_routing"], [6, 2, 1, "", "get_params"], [6, 2, 1, "", "set_params"]], "ragger_duck.retrieval": [[7, 1, 1, "", "BM25Retriever"], [8, 1, 1, "", "RetrieverReranker"], [9, 1, 1, "", "SemanticRetriever"]], "ragger_duck.retrieval.BM25Retriever": [[7, 2, 1, "", "fit"], [7, 2, 1, "", "get_metadata_routing"], [7, 2, 1, "", "get_params"], [7, 2, 1, "", "query"], [7, 2, 1, "", "set_params"]], "ragger_duck.retrieval.RetrieverReranker": [[8, 2, 1, "", "fit"], [8, 2, 1, "", "get_metadata_routing"], [8, 2, 1, "", "get_params"], [8, 2, 1, "", "query"], [8, 2, 1, "", "set_params"]], "ragger_duck.retrieval.SemanticRetriever": [[9, 2, 1, "", "fit"], [9, 2, 1, "", "get_metadata_routing"], [9, 2, 1, "", "get_params"], [9, 2, 1, "", "query"], [9, 2, 1, "", "set_params"]], "ragger_duck.scraping": [[10, 1, 1, "", "APINumPyDocExtractor"], [11, 1, 1, "", "GalleryExampleExtractor"], [12, 1, 1, "", "UserGuideDocExtractor"]], "ragger_duck.scraping.APINumPyDocExtractor": [[10, 2, 1, "", "fit"], [10, 2, 1, "", "fit_transform"], [10, 2, 1, "", "get_metadata_routing"], [10, 2, 1, "", "get_params"], [10, 2, 1, "", "set_output"], [10, 2, 1, "", "set_params"], [10, 2, 1, "", "transform"]], "ragger_duck.scraping.GalleryExampleExtractor": [[11, 2, 1, "", "fit"], [11, 2, 1, "", "fit_transform"], [11, 2, 1, "", "get_metadata_routing"], [11, 2, 1, "", "get_params"], [11, 2, 1, "", "set_output"], [11, 2, 1, "", "set_params"], [11, 2, 1, "", "transform"]], "ragger_duck.scraping.UserGuideDocExtractor": [[12, 2, 1, "", "fit"], [12, 2, 1, "", "fit_transform"], [12, 2, 1, "", "get_metadata_routing"], [12, 2, 1, "", "get_params"], [12, 2, 1, "", "set_output"], [12, 2, 1, "", "set_params"], [12, 2, 1, "", "transform"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"]}, "titleterms": {"about": 0, "u": 0, "exampl": 1, "ragger": [2, 3], "duck": [2, 3], "document": [2, 3, 16, 19, 20], "get": 3, "start": 3, "us": 3, "librari": 3, "deploi": 3, "clone": 3, "project": 3, "build": 3, "scikit": 3, "learn": 3, "train": 3, "semant": [3, 18], "lexic": [3, 18], "retriev": [3, 15, 18], "download": 3, "larg": 3, "languag": 3, "model": 3, "launch": 3, "web": 3, "consol": 3, "embed": 4, "sentencetransform": 5, "basicpromptingstrategi": 6, "bm25retriev": 7, "retrieverrerank": 8, "semanticretriev": 9, "apinumpydocextractor": 10, "galleryexampleextractor": 11, "userguidedocextractor": 12, "api": [13, 19, 20], "refer": 13, "prompt": [14, 19], "scrape": [16, 20], "user": [17, 20], "guid": [17, 20], "implement": 17, "detail": 17, "rerank": 18, "merg": 18, "text": 20, "releas": 21, "histori": 21, "version": [21, 22], "0": [21, 22], "1": [21, 22], "changelog": [21, 22]}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 60}, "alltitles": {"About us": [[0, "about-us"]], "Examples": [[1, "examples"]], "Ragger Duck documentation": [[2, "ragger-duck-documentation"]], "Getting Started": [[3, "getting-started"]], "Use the Ragger Duck library": [[3, "use-the-ragger-duck-library"]], "Deploy Ragger Duck": [[3, "deploy-ragger-duck"]], "Cloning the project": [[3, "cloning-the-project"]], "Build the scikit-learn documentation": [[3, "build-the-scikit-learn-documentation"]], "Train the semantic and lexical retrievers": [[3, "train-the-semantic-and-lexical-retrievers"]], "Download the Large Language Model": [[3, "download-the-large-language-model"]], "Launch the Web Console": [[3, "launch-the-web-console"]], "Embedding": [[4, "module-ragger_duck.embedding"]], "SentenceTransformer": [[5, "sentencetransformer"]], "BasicPromptingStrategy": [[6, "basicpromptingstrategy"]], "BM25Retriever": [[7, "bm25retriever"]], "RetrieverReranker": [[8, "retrieverreranker"]], "SemanticRetriever": [[9, "semanticretriever"]], "APINumPyDocExtractor": [[10, "apinumpydocextractor"]], "GalleryExampleExtractor": [[11, "galleryexampleextractor"]], "UserGuideDocExtractor": [[12, "userguidedocextractor"]], "API reference": [[13, "api-reference"]], "Prompt": [[14, "module-ragger_duck.prompt"]], "Retrieval": [[15, "module-ragger_duck.retrieval"]], "Scraping the documentation": [[16, "module-ragger_duck.scraping"]], "User Guide": [[17, "user-guide"]], "Implementation details": [[17, "implementation-details"]], "Retriever": [[18, "retriever"]], "Lexical retrievers": [[18, "lexical-retrievers"]], "Semantical retrievers": [[18, "semantical-retrievers"]], "Reranker: merging lexical and semantical retrievers": [[18, "reranker-merging-lexical-and-semantical-retrievers"]], "Prompting": [[19, "prompting"]], "Prompting for API documentation": [[19, "prompting-for-api-documentation"]], "Text Scraping": [[20, "text-scraping"]], "API documentation": [[20, "api-documentation"]], "User Guide documentation": [[20, "user-guide-documentation"]], "Release history": [[21, "release-history"]], "Version 0.1": [[21, "version-0-1"], [22, "version-0-1"]], "Changelog": [[21, "changelog"], [22, "changelog"]]}, "indexentries": {"module": [[4, "module-ragger_duck.embedding"], [14, "module-ragger_duck.prompt"], [15, "module-ragger_duck.retrieval"], [16, "module-ragger_duck.scraping"]], "ragger_duck.embedding": [[4, "module-ragger_duck.embedding"]], "sentencetransformer (class in ragger_duck.embedding)": [[5, "ragger_duck.embedding.SentenceTransformer"]], "fit() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.fit"]], "fit_transform() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.fit_transform"]], "get_metadata_routing() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.get_metadata_routing"]], "get_params() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.get_params"]], "set_output() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.set_output"]], "set_params() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.set_params"]], "transform() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.transform"]], "basicpromptingstrategy (class in ragger_duck.prompt)": [[6, "ragger_duck.prompt.BasicPromptingStrategy"]], "get_metadata_routing() (ragger_duck.prompt.basicpromptingstrategy method)": [[6, "ragger_duck.prompt.BasicPromptingStrategy.get_metadata_routing"]], "get_params() (ragger_duck.prompt.basicpromptingstrategy method)": [[6, "ragger_duck.prompt.BasicPromptingStrategy.get_params"]], "set_params() (ragger_duck.prompt.basicpromptingstrategy method)": [[6, "ragger_duck.prompt.BasicPromptingStrategy.set_params"]], "bm25retriever (class in ragger_duck.retrieval)": [[7, "ragger_duck.retrieval.BM25Retriever"]], "fit() (ragger_duck.retrieval.bm25retriever method)": [[7, "ragger_duck.retrieval.BM25Retriever.fit"]], "get_metadata_routing() (ragger_duck.retrieval.bm25retriever method)": [[7, "ragger_duck.retrieval.BM25Retriever.get_metadata_routing"]], "get_params() (ragger_duck.retrieval.bm25retriever method)": [[7, "ragger_duck.retrieval.BM25Retriever.get_params"]], "query() (ragger_duck.retrieval.bm25retriever method)": [[7, "ragger_duck.retrieval.BM25Retriever.query"]], "set_params() (ragger_duck.retrieval.bm25retriever method)": [[7, "ragger_duck.retrieval.BM25Retriever.set_params"]], "retrieverreranker (class in ragger_duck.retrieval)": [[8, "ragger_duck.retrieval.RetrieverReranker"]], "fit() (ragger_duck.retrieval.retrieverreranker method)": [[8, "ragger_duck.retrieval.RetrieverReranker.fit"]], "get_metadata_routing() (ragger_duck.retrieval.retrieverreranker method)": [[8, "ragger_duck.retrieval.RetrieverReranker.get_metadata_routing"]], "get_params() (ragger_duck.retrieval.retrieverreranker method)": [[8, "ragger_duck.retrieval.RetrieverReranker.get_params"]], "query() (ragger_duck.retrieval.retrieverreranker method)": [[8, "ragger_duck.retrieval.RetrieverReranker.query"]], "set_params() (ragger_duck.retrieval.retrieverreranker method)": [[8, "ragger_duck.retrieval.RetrieverReranker.set_params"]], "semanticretriever (class in ragger_duck.retrieval)": [[9, "ragger_duck.retrieval.SemanticRetriever"]], "fit() (ragger_duck.retrieval.semanticretriever method)": [[9, "ragger_duck.retrieval.SemanticRetriever.fit"]], "get_metadata_routing() (ragger_duck.retrieval.semanticretriever method)": [[9, "ragger_duck.retrieval.SemanticRetriever.get_metadata_routing"]], "get_params() (ragger_duck.retrieval.semanticretriever method)": [[9, "ragger_duck.retrieval.SemanticRetriever.get_params"]], "query() (ragger_duck.retrieval.semanticretriever method)": [[9, "ragger_duck.retrieval.SemanticRetriever.query"]], "set_params() (ragger_duck.retrieval.semanticretriever method)": [[9, "ragger_duck.retrieval.SemanticRetriever.set_params"]], "apinumpydocextractor (class in ragger_duck.scraping)": [[10, "ragger_duck.scraping.APINumPyDocExtractor"]], "fit() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.fit"]], "fit_transform() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.fit_transform"]], "get_metadata_routing() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.get_metadata_routing"]], "get_params() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.get_params"]], "set_output() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.set_output"]], "set_params() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.set_params"]], "transform() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.transform"]], "galleryexampleextractor (class in ragger_duck.scraping)": [[11, "ragger_duck.scraping.GalleryExampleExtractor"]], "fit() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.fit"]], "fit_transform() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.fit_transform"]], "get_metadata_routing() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.get_metadata_routing"]], "get_params() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.get_params"]], "set_output() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.set_output"]], "set_params() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.set_params"]], "transform() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.transform"]], "userguidedocextractor (class in ragger_duck.scraping)": [[12, "ragger_duck.scraping.UserGuideDocExtractor"]], "fit() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.fit"]], "fit_transform() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.fit_transform"]], "get_metadata_routing() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.get_metadata_routing"]], "get_params() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.get_params"]], "set_output() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.set_output"]], "set_params() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.set_params"]], "transform() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.transform"]], "ragger_duck.prompt": [[14, "module-ragger_duck.prompt"]], "ragger_duck.retrieval": [[15, "module-ragger_duck.retrieval"]], "ragger_duck.scraping": [[16, "module-ragger_duck.scraping"]]}}) \ No newline at end of file