Date: Apr 18, 2024 Version:
+Date: Apr 19, 2024 Version:
Useful links: Source Repository | Issues & Ideas |
diff --git a/searchindex.js b/searchindex.js index 12571bf..9364a4b 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["about", "auto_examples/index", "index", "install", "references/embedding", "references/generated/ragger_duck.embedding.SentenceTransformer", "references/generated/ragger_duck.prompt.BasicPromptingStrategy", "references/generated/ragger_duck.retrieval.BM25Retriever", "references/generated/ragger_duck.retrieval.RetrieverReranker", "references/generated/ragger_duck.retrieval.SemanticRetriever", "references/generated/ragger_duck.scraping.APINumPyDocExtractor", "references/generated/ragger_duck.scraping.GalleryExampleExtractor", "references/generated/ragger_duck.scraping.UserGuideDocExtractor", "references/index", "references/prompt", "references/retrieval", "references/scraping", "user_guide/index", "user_guide/information_retrieval", "user_guide/large_language_model", "user_guide/text_scraping", "whats_new", "whats_new/v0.1"], "filenames": ["about.rst", "auto_examples/index.rst", "index.rst", "install.rst", "references/embedding.rst", "references/generated/ragger_duck.embedding.SentenceTransformer.rst", "references/generated/ragger_duck.prompt.BasicPromptingStrategy.rst", "references/generated/ragger_duck.retrieval.BM25Retriever.rst", "references/generated/ragger_duck.retrieval.RetrieverReranker.rst", "references/generated/ragger_duck.retrieval.SemanticRetriever.rst", "references/generated/ragger_duck.scraping.APINumPyDocExtractor.rst", "references/generated/ragger_duck.scraping.GalleryExampleExtractor.rst", "references/generated/ragger_duck.scraping.UserGuideDocExtractor.rst", "references/index.rst", "references/prompt.rst", "references/retrieval.rst", "references/scraping.rst", "user_guide/index.rst", "user_guide/information_retrieval.rst", "user_guide/large_language_model.rst", "user_guide/text_scraping.rst", "whats_new.rst", "whats_new/v0.1.rst"], "titles": ["About us", "Examples", "Ragger Duck documentation", "Getting Started", "Embedding", "SentenceTransformer", "BasicPromptingStrategy", "BM25Retriever", "RetrieverReranker", "SemanticRetriever", "APINumPyDocExtractor", "GalleryExampleExtractor", "UserGuideDocExtractor", "API reference", "Prompt", "Retrieval", "Scraping the documentation", "User guide", "Retriever", "Prompting", "Text Scraping", "Release history", "Version 0.1"], "terms": {"thi": [0, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 17, 20], "i": [0, 1, 2, 3, 5, 7, 8, 9, 10, 11, 12, 13, 18, 19, 20], "sundai": 0, "afternoon": 0, "project": [0, 2, 17, 18], "One": 1, "dai": 1, "write": 1, "some": [1, 2, 3, 17, 20], "right": [1, 8], "now": [1, 3, 17], "someth": 1, "locat": 1, "script": 1, "folder": [1, 10, 11, 12], "date": 2, "apr": 2, "18": 2, "2024": 2, "version": [2, 3, 5, 10, 11, 12], "us": [2, 5, 6, 7, 8, 9, 10, 11, 12, 17, 18, 20], "link": [2, 3], "sourc": [2, 3, 10, 11, 12, 17], "repositori": [2, 3, 5], "issu": 2, "idea": [2, 17, 18], "sklearn": [2, 3, 7], "The": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16, 17, 20], "provid": [2, 3, 17, 18, 20], "set": [2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 17], "tool": 2, "build": [2, 9, 17, 18, 20], "rag": [2, 3, 17], "answer": [2, 6, 17, 19], "question": [2, 17, 19], "about": 2, "scikit": [2, 5, 9, 16], "learn": [2, 5, 9, 16], "librari": [2, 9], "get": [2, 5, 6, 7, 8, 9, 10, 11, 12, 17, 19], "start": 2, "check": [2, 5, 6, 7, 8, 9, 10, 11, 12], "out": 2, "guid": [2, 3, 5, 6, 7, 8, 9, 10, 11, 12], "instal": [2, 3], "extra": 2, "inform": [2, 5, 6, 7, 8, 9, 10, 11, 12, 20], "new": [2, 5, 10, 11, 12, 17], "contribut": 2, "also": [2, 3, 17], "To": [2, 3], "guidelin": 2, "user": [2, 3, 5, 6, 7, 8, 9, 10, 11, 12], "depth": [2, 17], "kei": [2, 5], "concept": 2, "background": 2, "explan": 2, "api": [2, 3, 5, 9, 10, 11, 12, 17], "refer": [2, 3, 20], "contain": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16, 17, 20], "detail": [2, 3], "descript": 2, "avail": [2, 18], "exampl": [2, 3, 5, 10, 11, 12], "galleri": [2, 3, 11], "good": 2, "place": 2, "see": [2, 5, 10, 11, 12], "action": 2, "select": [2, 3], "an": [2, 3, 5, 6, 9, 10, 11, 12, 17, 18], "dive": 2, "eas": 3, "deploy": 3, "we": [3, 6, 8, 9, 17, 18, 19, 20], "reli": 3, "pixi": 3, "follow": [3, 5, 8, 9, 17], "short": 3, "current": 3, "support": 3, "platform": 3, "should": [3, 5, 6, 17], "enough": 3, "curl": 3, "fssl": 3, "http": [3, 17], "sh": 3, "bash": 3, "In": [3, 17, 18, 21, 22], "latest": 3, "stage": [3, 17], "charg": 3, "creat": [3, 5, 9, 17], "python": 3, "environ": 3, "alreadi": 3, "setup": 3, "sever": [3, 8, 17], "you": [3, 8, 17], "depend": 3, "hardwar": 3, "your": 3, "dispos": 3, "cpu": [3, 5, 11, 12], "cross": [3, 8, 18], "e": [3, 5], "linux": 3, "maco": 3, "x86_64": 3, "arm64": 3, "mp": [3, 5], "m1": 3, "m2": 3, "m3": 3, "chip": 3, "cuda": [3, 5], "12": 3, "1": [3, 5, 7, 9, 10, 11, 12, 17], "machin": 3, "gpu": [3, 5], "make": [3, 17, 18], "experi": 3, "scalewai": 3, "instanc": [3, 5, 6, 7, 8, 9, 10, 11, 12], "l4": 3, "11": 3, "7": 3, "similar": [3, 9, 17, 18], "instead": 3, "note": [3, 8, 17], "can": [3, 5, 8, 10, 12, 17, 20], "modifi": 3, "toml": 3, "own": 3, "sinc": 3, "might": 3, "suit": 3, "need": [3, 18], "github": 3, "self": [3, 5, 6, 7, 8, 9, 10, 11, 12], "all": [3, 8], "necessari": 3, "file": 3, "recurs": 3, "wai": [3, 17, 20], "submodul": 3, "git": 3, "com": 3, "glemaitr": 3, "first": [3, 5, 17], "html": [3, 12], "gener": 3, "page": [3, 12], "run": [3, 11, 12, 17], "command": 3, "frozen": 3, "doc": 3, "have": [3, 5, 6, 7, 8, 9, 10, 11, 12, 17, 18], "differ": [3, 17, 20], "each": [3, 5, 6, 7, 8, 9, 10, 11, 12, 18], "type": [3, 17, 18], "more": [3, 17, 18, 20], "strategi": [3, 6, 19], "propos": [3, 17], "specif": [3, 20], "specifi": 3, "llm": [3, 6, 17, 19], "For": 3, "test": 3, "purpos": 3, "mistral": [3, 17], "7b": 3, "fetch": 3, "ar": [3, 5, 6, 7, 8, 9, 10, 11, 12, 17, 18, 20], "Then": [3, 17, 18], "requir": 3, "which": [3, 17], "want": [3, 8, 17], "offload": 3, "access": 3, "address": 3, "127": 3, "0": [3, 7], "8123": 3, "when": [3, 17, 20], "discuss": 3, "earlier": 3, "edit": 3, "mode": 3, "howev": [3, 17], "via": 3, "pip": 3, "don": [3, 20], "t": [3, 20], "ani": [3, 18, 20], "better": [3, 17], "handl": 3, "modul": [4, 5, 16, 20], "ragger_duck": [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16], "function": [4, 10, 12, 16, 20], "emb": [4, 5, 9], "transform": [4, 5, 7, 9, 10, 11, 12, 17, 18], "allow": [4, 6], "text": [4, 5, 7, 10, 11, 12, 17], "class": [5, 6, 7, 8, 9, 10, 11, 12, 20], "embed": [5, 7, 9, 13, 18], "model_name_or_path": 5, "none": [5, 7, 8, 9, 10, 11, 12], "devic": 5, "cache_fold": 5, "use_auth_token": 5, "batch_siz": 5, "32": 5, "show_progress_bar": 5, "true": [5, 6, 7, 8, 9, 10, 11, 12], "sentenc": [5, 9, 18], "thin": 5, "wrapper": 5, "around": 5, "sentence_transform": [5, 8], "thu": 5, "insid": 5, "pipelin": [5, 6, 7, 8, 9, 10, 11, 12], "paramet": [5, 6, 7, 8, 9, 10, 11, 12, 20], "str": [5, 7, 8, 9, 12], "default": [5, 6, 7, 8, 9, 10, 11, 12], "If": [5, 6, 7, 8, 9, 10, 11, 12, 18], "filepath": 5, "disc": 5, "load": 5, "model": [5, 6, 17, 18, 19], "from": [5, 7, 8, 9, 10, 11, 12, 17, 18, 19, 20], "path": [5, 10, 11, 12], "tri": 5, "download": [5, 18], "pre": [5, 18], "train": [5, 7, 8, 9, 17, 18], "fail": 5, "construct": 5, "huggingfac": [5, 18], "name": [5, 6, 7, 8, 9, 10, 11, 12, 18], "iter": 5, "nn": 5, "custom": 5, "scratch": 5, "g": 5, "comput": [5, 7, 8, 9], "store": 5, "bool": [5, 6, 7, 8, 9, 10, 11, 12], "authent": 5, "token": 5, "privat": 5, "int": [5, 7, 8, 9, 11, 12], "batch": 5, "size": [5, 11, 12, 20], "dure": [5, 17, 18], "whether": [5, 6, 8], "show": [5, 17], "progress": [5, 21, 22], "bar": 5, "method": [5, 6, 7, 8, 9, 10, 11, 12], "fit": [5, 7, 8, 9, 10, 11, 12], "x": [5, 7, 8, 9, 10, 11, 12, 17], "y": [5, 7, 8, 9, 10, 11, 12, 17], "No": [5, 10, 11, 12], "op": [5, 10, 11, 12], "oper": [5, 10, 11, 12], "onli": [5, 10, 11, 12, 20], "valid": [5, 10, 11, 12], "ignor": [5, 7, 8, 9, 10, 11, 12], "return": [5, 6, 7, 8, 9, 10, 11, 12, 17], "estim": [5, 6, 7, 8, 9, 10, 11, 12, 20], "fit_transform": [5, 10, 11, 12], "fit_param": [5, 10, 11, 12], "data": [5, 7, 8, 9, 10, 11, 12], "option": [5, 10, 11, 12], "arrai": [5, 10, 11, 12], "like": [5, 10, 11, 12], "shape": [5, 7, 9, 10, 11, 12], "n_sampl": [5, 10, 11, 12], "n_featur": [5, 7, 9, 10, 11, 12], "input": [5, 7, 8, 9, 10, 11, 12], "sampl": [5, 10, 11, 12], "n_output": [5, 10, 11, 12], "target": [5, 10, 11, 12], "valu": [5, 6, 7, 8, 9, 10, 11, 12], "unsupervis": [5, 10, 11, 12], "dict": [5, 6, 7, 8, 9, 10, 11, 12], "addit": [5, 10, 11, 12, 17], "x_new": [5, 10, 11, 12], "ndarrai": [5, 7, 9, 10, 11, 12], "n_features_new": [5, 10, 11, 12], "get_metadata_rout": [5, 6, 7, 8, 9, 10, 11, 12], "metadata": [5, 6, 7, 8, 9, 10, 11, 12], "rout": [5, 6, 7, 8, 9, 10, 11, 12], "object": [5, 6, 7, 8, 9, 10, 11, 12], "pleas": [5, 6, 7, 8, 9, 10, 11, 12], "how": [5, 6, 7, 8, 9, 10, 11, 12, 17], "mechan": [5, 6, 7, 8, 9, 10, 11, 12], "work": [5, 6, 7, 8, 9, 10, 11, 12], "metadatarequest": [5, 6, 7, 8, 9, 10, 11, 12], "A": [5, 6, 7, 8, 9, 10, 11, 12, 17], "encapsul": [5, 6, 7, 8, 9, 10, 11, 12], "get_param": [5, 6, 7, 8, 9, 10, 11, 12], "deep": [5, 6, 7, 8, 9, 10, 11, 12], "subobject": [5, 6, 7, 8, 9, 10, 11, 12], "param": [5, 6, 7, 8, 9, 10, 11, 12], "map": [5, 6, 7, 8, 9, 10, 11, 12], "set_output": [5, 10, 11, 12], "output": [5, 10, 11, 12], "sphx_glr_auto_examples_miscellaneous_plot_set_output": [5, 10, 11, 12], "py": [5, 10, 11, 12], "panda": [5, 10, 11, 12], "configur": [5, 10, 11, 12], "format": [5, 10, 11, 12], "datafram": [5, 10, 11, 12], "polar": [5, 10, 11, 12], "unchang": [5, 10, 11, 12], "4": [5, 10, 11, 12, 17], "wa": [5, 10, 11, 12], "ad": [5, 10, 11, 12], "set_param": [5, 6, 7, 8, 9, 10, 11, 12], "simpl": [5, 6, 7, 8, 9, 10, 11, 12, 17, 20], "well": [5, 6, 7, 8, 9, 10, 11, 12], "nest": [5, 6, 7, 8, 9, 10, 11, 12], "latter": [5, 6, 7, 8, 9, 10, 11, 12], "form": [5, 6, 7, 8, 9, 10, 11, 12], "compon": [5, 6, 7, 8, 9, 10, 11, 12, 17], "__": [5, 6, 7, 8, 9, 10, 11, 12], "so": [5, 6, 7, 8, 9, 10, 11, 12], "": [5, 6, 7, 8, 9, 10, 11, 12, 17], "possibl": [5, 6, 7, 8, 9, 10, 11, 12], "updat": [5, 6, 7, 8, 9, 10, 11, 12], "vector": [5, 7, 17, 18], "length": 5, "n_sentenc": [5, 7, 9], "singl": 5, "list": [5, 7, 8, 9, 10, 11, 12], "dictionari": [5, 10, 11, 12], "embedding_s": 5, "prompt": [6, 13, 17], "retriev": [6, 7, 8, 9, 13, 19], "use_retrieved_context": 6, "queri": [6, 7, 8, 9, 17, 18], "onc": [6, 19], "context": [6, 8, 17, 18, 19], "request": [6, 19], "languag": [6, 17, 19], "expect": [6, 8, 9, 17], "implement": [6, 8, 18, 19], "__call__": 6, "take": 6, "respons": 6, "It": [6, 10, 17, 20], "instruct": 6, "base": [6, 7, 8, 17], "befor": [6, 17], "count_vector": 7, "top_k": [7, 9], "b": 7, "75": 7, "k1": 7, "6": 7, "k": [7, 9], "nearest": [7, 9, 17, 18], "neighbor": [7, 9, 17, 18], "lexic": [7, 8, 17, 19], "search": [7, 17, 19], "bm25": 7, "count": [7, 17], "term": 7, "document": [7, 8, 9, 10, 11, 12, 13, 17, 18], "feature_extract": 7, "countvector": [7, 18], "number": [7, 8, 9, 11, 12], "attribut": [7, 9, 11, 12], "x_fit_": [7, 9], "x_embedded_": [7, 9], "vocabulari": [7, 8, 18], "idf": [7, 8, 18], "most": [7, 8, 9, 17], "relev": [7, 8, 9, 17], "cross_encod": 8, "min_top_k": 8, "max_top_k": 8, "threshold": 8, "drop_dupl": 8, "hybrid": 8, "semant": [8, 9, 17, 19], "encod": [8, 18], "rerank": [8, 17, 19], "accept": 8, "case": [8, 17, 18], "result": [8, 17, 18], "crossencod": 8, "minimum": 8, "less": 8, "than": 8, "maximum": 8, "float": 8, "filter": 8, "score": [8, 18], "drop": 8, "duplic": 8, "step": [8, 17], "done": [8, 17], "after": 8, "index": [9, 17], "faiss": [9, 18], "inner": [9, 17], "product": 9, "cosin": 9, "mean": [9, 17], "normal": 9, "scrape": [10, 11, 12, 13, 17], "extract": [10, 11, 12, 20], "numpydoc": [10, 20], "process": [10, 12], "templat": 10, "pathlib": [10, 11, 12], "chunk_siz": [11, 12], "300": [11, 12], "chunk_overlap": [11, 12], "50": [11, 12], "n_job": [11, 12], "chunk": [11, 12, 18, 20], "split": [11, 12], "overlap": [11, 12], "between": [11, 12, 17, 18], "two": [11, 12, 17, 18], "consecut": [11, 12], "job": [11, 12], "parallel": [11, 12], "core": [11, 12], "text_splitter_": [11, 12], "langchain": [11, 12], "text_splitt": [11, 12], "recursivecharactertextsplitt": [11, 12], "splitter": [11, 12], "folders_to_exclud": 12, "string": 12, "correspond": [12, 17], "exclud": 12, "full": 13, "packag": 13, "apinumpydocextractor": [13, 20], "galleryexampleextractor": 13, "userguidedocextractor": [13, 20], "sentencetransform": [13, 18], "bm25retriev": [13, 18], "semanticretriev": [13, 18], "retrieverrerank": [13, 18], "basicpromptingstrategi": [13, 19], "websit": [16, 20], "go": 17, "our": [17, 18], "framework": 17, "high": 17, "level": 17, "overview": 17, "main": 17, "let": 17, "defin": 17, "larg": [17, 19], "graphic": 17, "below": 17, "repres": 17, "interact": 17, "proof": 17, "poc": 17, "interest": 17, "zero": 17, "shot": 17, "formul": 17, "natur": 17, "through": 17, "gpt": 17, "openai": 17, "ii": 17, "local": 17, "open": 17, "weight": [17, 18], "llama": 17, "introduc": 17, "major": 17, "previou": 17, "consist": 17, "given": [17, 18], "therefor": 17, "condit": 17, "ha": 17, "been": 17, "extens": 17, "studi": 17, "past": 17, "relat": 17, "applic": 17, "engin": 17, "next": 17, "section": [17, 20], "explain": 17, "algorithm": [17, 18], "mathemat": 17, "represent": 17, "databas": [17, 18], "capabl": 17, "find": 17, "phase": 17, "These": 17, "popul": 17, "pass": 17, "found": 17, "those": 17, "coupl": 17, "reader": 17, "comprehens": 17, "review": 17, "without": [17, 20], "distinguish": 17, "bag": 17, "word": 17, "bow": 17, "neural": 17, "network": 17, "thei": [17, 20], "lack": 17, "abil": 17, "captur": 17, "approach": 17, "improv": 17, "perform": 17, "expand": 17, "topic": 17, "spars": 17, "leverag": 17, "invert": 17, "continu": 17, "space": [17, 18], "while": [17, 20], "complex": 17, "due": 17, "dens": 17, "approxim": [17, 18], "guo": 17, "j": 17, "cai": 17, "fan": 17, "sun": 17, "f": 17, "zhang": 17, "r": 17, "cheng": 17, "2022": 17, "acm": 17, "transact": 17, "system": 17, "toi": 17, "40": 17, "42": 17, "arxiv": 17, "org": 17, "ab": 17, "2103": 17, "04831": 17, "present": 17, "behind": 17, "devil": 17, "import": 17, "meaning": [17, 20], "merg": 17, "differenti": 18, "exact": 18, "match": 18, "tf": 18, "scheme": 18, "seen": 18, "flexibl": 18, "closest": 18, "As": 18, "both": 18, "microsoft": 18, "bing": 18, "pair": 18, "advanc": 20, "scraper": 20, "inde": 20, "pars": 20, "control": 20, "alwai": 20, "hope": 20, "remov": 20, "ambigu": 20, "could": 20, "exist": 20, "webpag": 20, "addition": 20}, "objects": {"ragger_duck": [[4, 0, 0, "-", "embedding"], [14, 0, 0, "-", "prompt"], [15, 0, 0, "-", "retrieval"], [16, 0, 0, "-", "scraping"]], "ragger_duck.embedding": [[5, 1, 1, "", "SentenceTransformer"]], "ragger_duck.embedding.SentenceTransformer": [[5, 2, 1, "", "fit"], [5, 2, 1, "", "fit_transform"], [5, 2, 1, "", "get_metadata_routing"], [5, 2, 1, "", "get_params"], [5, 2, 1, "", "set_output"], [5, 2, 1, "", "set_params"], [5, 2, 1, "", "transform"]], "ragger_duck.prompt": [[6, 1, 1, "", "BasicPromptingStrategy"]], "ragger_duck.prompt.BasicPromptingStrategy": [[6, 2, 1, "", "get_metadata_routing"], [6, 2, 1, "", "get_params"], [6, 2, 1, "", "set_params"]], "ragger_duck.retrieval": [[7, 1, 1, "", "BM25Retriever"], [8, 1, 1, "", "RetrieverReranker"], [9, 1, 1, "", "SemanticRetriever"]], "ragger_duck.retrieval.BM25Retriever": [[7, 2, 1, "", "fit"], [7, 2, 1, "", "get_metadata_routing"], [7, 2, 1, "", "get_params"], [7, 2, 1, "", "query"], [7, 2, 1, "", "set_params"]], "ragger_duck.retrieval.RetrieverReranker": [[8, 2, 1, "", "fit"], [8, 2, 1, "", "get_metadata_routing"], [8, 2, 1, "", "get_params"], [8, 2, 1, "", "query"], [8, 2, 1, "", "set_params"]], "ragger_duck.retrieval.SemanticRetriever": [[9, 2, 1, "", "fit"], [9, 2, 1, "", "get_metadata_routing"], [9, 2, 1, "", "get_params"], [9, 2, 1, "", "query"], [9, 2, 1, "", "set_params"]], "ragger_duck.scraping": [[10, 1, 1, "", "APINumPyDocExtractor"], [11, 1, 1, "", "GalleryExampleExtractor"], [12, 1, 1, "", "UserGuideDocExtractor"]], "ragger_duck.scraping.APINumPyDocExtractor": [[10, 2, 1, "", "fit"], [10, 2, 1, "", "fit_transform"], [10, 2, 1, "", "get_metadata_routing"], [10, 2, 1, "", "get_params"], [10, 2, 1, "", "set_output"], [10, 2, 1, "", "set_params"], [10, 2, 1, "", "transform"]], "ragger_duck.scraping.GalleryExampleExtractor": [[11, 2, 1, "", "fit"], [11, 2, 1, "", "fit_transform"], [11, 2, 1, "", "get_metadata_routing"], [11, 2, 1, "", "get_params"], [11, 2, 1, "", "set_output"], [11, 2, 1, "", "set_params"], [11, 2, 1, "", "transform"]], "ragger_duck.scraping.UserGuideDocExtractor": [[12, 2, 1, "", "fit"], [12, 2, 1, "", "fit_transform"], [12, 2, 1, "", "get_metadata_routing"], [12, 2, 1, "", "get_params"], [12, 2, 1, "", "set_output"], [12, 2, 1, "", "set_params"], [12, 2, 1, "", "transform"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"]}, "titleterms": {"about": 0, "u": 0, "exampl": 1, "ragger": [2, 3], "duck": [2, 3], "document": [2, 3, 16, 19, 20], "get": 3, "start": 3, "deploi": 3, "clone": 3, "project": 3, "build": 3, "scikit": 3, "learn": 3, "train": 3, "semant": [3, 18], "lexic": [3, 18], "retriev": [3, 15, 17, 18], "download": 3, "larg": 3, "languag": 3, "model": 3, "launch": 3, "web": 3, "consol": 3, "us": 3, "librari": 3, "embed": 4, "sentencetransform": 5, "basicpromptingstrategi": 6, "bm25retriev": 7, "retrieverrerank": 8, "semanticretriev": 9, "apinumpydocextractor": 10, "galleryexampleextractor": 11, "userguidedocextractor": 12, "api": [13, 19, 20], "refer": [13, 17], "prompt": [14, 19], "scrape": [16, 20], "user": [17, 20], "guid": [17, 20], "what": 17, "i": 17, "augment": 17, "gener": 17, "inform": 17, "concept": 17, "detail": 17, "regard": 17, "implement": 17, "rerank": 18, "merg": 18, "text": 20, "releas": 21, "histori": 21, "version": [21, 22], "0": [21, 22], "1": [21, 22], "changelog": [21, 22]}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 60}, "alltitles": {"About us": [[0, "about-us"]], "Examples": [[1, "examples"]], "Ragger Duck documentation": [[2, "ragger-duck-documentation"]], "Getting Started": [[3, "getting-started"]], "Deploy Ragger Duck": [[3, "deploy-ragger-duck"]], "Cloning the project": [[3, "cloning-the-project"]], "Build the scikit-learn documentation": [[3, "build-the-scikit-learn-documentation"]], "Train the semantic and lexical retrievers": [[3, "train-the-semantic-and-lexical-retrievers"]], "Download the Large Language Model": [[3, "download-the-large-language-model"]], "Launch the Web Console": [[3, "launch-the-web-console"]], "Use the Ragger Duck library": [[3, "use-the-ragger-duck-library"]], "Embedding": [[4, "module-ragger_duck.embedding"]], "SentenceTransformer": [[5, "sentencetransformer"]], "BasicPromptingStrategy": [[6, "basicpromptingstrategy"]], "BM25Retriever": [[7, "bm25retriever"]], "RetrieverReranker": [[8, "retrieverreranker"]], "SemanticRetriever": [[9, "semanticretriever"]], "APINumPyDocExtractor": [[10, "apinumpydocextractor"]], "GalleryExampleExtractor": [[11, "galleryexampleextractor"]], "UserGuideDocExtractor": [[12, "userguidedocextractor"]], "API reference": [[13, "api-reference"]], "Prompt": [[14, "module-ragger_duck.prompt"]], "Retrieval": [[15, "module-ragger_duck.retrieval"]], "Scraping the documentation": [[16, "module-ragger_duck.scraping"]], "User Guide": [[17, "user-guide"]], "What is Retrieval Augmented Generation?": [[17, "what-is-retrieval-augmented-generation"]], "Information retrieval": [[17, "information-retrieval"]], "Concepts": [[17, "concepts"]], "Details regarding the retrievers": [[17, "details-regarding-the-retrievers"]], "References": [[17, "references"]], "Implementation details": [[17, "implementation-details"]], "Retriever": [[18, "retriever"]], "Lexical retrievers": [[18, "lexical-retrievers"]], "Semantical retrievers": [[18, "semantical-retrievers"]], "Reranker: merging lexical and semantical retrievers": [[18, "reranker-merging-lexical-and-semantical-retrievers"]], "Prompting": [[19, "prompting"]], "Prompting for API documentation": [[19, "prompting-for-api-documentation"]], "Text Scraping": [[20, "text-scraping"]], "API documentation": [[20, "api-documentation"]], "User Guide documentation": [[20, "user-guide-documentation"]], "Release history": [[21, "release-history"]], "Version 0.1": [[21, "version-0-1"], [22, "version-0-1"]], "Changelog": [[21, "changelog"], [22, "changelog"]]}, "indexentries": {"module": [[4, "module-ragger_duck.embedding"], [14, "module-ragger_duck.prompt"], [15, "module-ragger_duck.retrieval"], [16, "module-ragger_duck.scraping"]], "ragger_duck.embedding": [[4, "module-ragger_duck.embedding"]], "sentencetransformer (class in ragger_duck.embedding)": [[5, "ragger_duck.embedding.SentenceTransformer"]], "fit() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.fit"]], "fit_transform() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.fit_transform"]], "get_metadata_routing() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.get_metadata_routing"]], "get_params() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.get_params"]], "set_output() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.set_output"]], "set_params() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.set_params"]], "transform() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.transform"]], "basicpromptingstrategy (class in ragger_duck.prompt)": [[6, "ragger_duck.prompt.BasicPromptingStrategy"]], "get_metadata_routing() (ragger_duck.prompt.basicpromptingstrategy method)": [[6, "ragger_duck.prompt.BasicPromptingStrategy.get_metadata_routing"]], "get_params() (ragger_duck.prompt.basicpromptingstrategy method)": [[6, "ragger_duck.prompt.BasicPromptingStrategy.get_params"]], "set_params() (ragger_duck.prompt.basicpromptingstrategy method)": [[6, "ragger_duck.prompt.BasicPromptingStrategy.set_params"]], "bm25retriever (class in ragger_duck.retrieval)": [[7, "ragger_duck.retrieval.BM25Retriever"]], "fit() (ragger_duck.retrieval.bm25retriever method)": [[7, "ragger_duck.retrieval.BM25Retriever.fit"]], "get_metadata_routing() (ragger_duck.retrieval.bm25retriever method)": [[7, "ragger_duck.retrieval.BM25Retriever.get_metadata_routing"]], "get_params() (ragger_duck.retrieval.bm25retriever method)": [[7, "ragger_duck.retrieval.BM25Retriever.get_params"]], "query() (ragger_duck.retrieval.bm25retriever method)": [[7, "ragger_duck.retrieval.BM25Retriever.query"]], "set_params() (ragger_duck.retrieval.bm25retriever method)": [[7, "ragger_duck.retrieval.BM25Retriever.set_params"]], "retrieverreranker (class in ragger_duck.retrieval)": [[8, "ragger_duck.retrieval.RetrieverReranker"]], "fit() (ragger_duck.retrieval.retrieverreranker method)": [[8, "ragger_duck.retrieval.RetrieverReranker.fit"]], "get_metadata_routing() (ragger_duck.retrieval.retrieverreranker method)": [[8, "ragger_duck.retrieval.RetrieverReranker.get_metadata_routing"]], "get_params() (ragger_duck.retrieval.retrieverreranker method)": [[8, "ragger_duck.retrieval.RetrieverReranker.get_params"]], "query() (ragger_duck.retrieval.retrieverreranker method)": [[8, "ragger_duck.retrieval.RetrieverReranker.query"]], "set_params() (ragger_duck.retrieval.retrieverreranker method)": [[8, "ragger_duck.retrieval.RetrieverReranker.set_params"]], "semanticretriever (class in ragger_duck.retrieval)": [[9, "ragger_duck.retrieval.SemanticRetriever"]], "fit() (ragger_duck.retrieval.semanticretriever method)": [[9, "ragger_duck.retrieval.SemanticRetriever.fit"]], "get_metadata_routing() (ragger_duck.retrieval.semanticretriever method)": [[9, "ragger_duck.retrieval.SemanticRetriever.get_metadata_routing"]], "get_params() (ragger_duck.retrieval.semanticretriever method)": [[9, "ragger_duck.retrieval.SemanticRetriever.get_params"]], "query() (ragger_duck.retrieval.semanticretriever method)": [[9, "ragger_duck.retrieval.SemanticRetriever.query"]], "set_params() (ragger_duck.retrieval.semanticretriever method)": [[9, "ragger_duck.retrieval.SemanticRetriever.set_params"]], "apinumpydocextractor (class in ragger_duck.scraping)": [[10, "ragger_duck.scraping.APINumPyDocExtractor"]], "fit() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.fit"]], "fit_transform() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.fit_transform"]], "get_metadata_routing() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.get_metadata_routing"]], "get_params() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.get_params"]], "set_output() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.set_output"]], "set_params() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.set_params"]], "transform() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.transform"]], "galleryexampleextractor (class in ragger_duck.scraping)": [[11, "ragger_duck.scraping.GalleryExampleExtractor"]], "fit() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.fit"]], "fit_transform() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.fit_transform"]], "get_metadata_routing() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.get_metadata_routing"]], "get_params() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.get_params"]], "set_output() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.set_output"]], "set_params() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.set_params"]], "transform() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.transform"]], "userguidedocextractor (class in ragger_duck.scraping)": [[12, "ragger_duck.scraping.UserGuideDocExtractor"]], "fit() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.fit"]], "fit_transform() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.fit_transform"]], "get_metadata_routing() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.get_metadata_routing"]], "get_params() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.get_params"]], "set_output() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.set_output"]], "set_params() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.set_params"]], "transform() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.transform"]], "ragger_duck.prompt": [[14, "module-ragger_duck.prompt"]], "ragger_duck.retrieval": [[15, "module-ragger_duck.retrieval"]], "ragger_duck.scraping": [[16, "module-ragger_duck.scraping"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["about", "auto_examples/index", "index", "install", "references/embedding", "references/generated/ragger_duck.embedding.SentenceTransformer", "references/generated/ragger_duck.prompt.BasicPromptingStrategy", "references/generated/ragger_duck.retrieval.BM25Retriever", "references/generated/ragger_duck.retrieval.RetrieverReranker", "references/generated/ragger_duck.retrieval.SemanticRetriever", "references/generated/ragger_duck.scraping.APINumPyDocExtractor", "references/generated/ragger_duck.scraping.GalleryExampleExtractor", "references/generated/ragger_duck.scraping.UserGuideDocExtractor", "references/index", "references/prompt", "references/retrieval", "references/scraping", "user_guide/index", "user_guide/information_retrieval", "user_guide/large_language_model", "user_guide/text_scraping", "whats_new", "whats_new/v0.1"], "filenames": ["about.rst", "auto_examples/index.rst", "index.rst", "install.rst", "references/embedding.rst", "references/generated/ragger_duck.embedding.SentenceTransformer.rst", "references/generated/ragger_duck.prompt.BasicPromptingStrategy.rst", "references/generated/ragger_duck.retrieval.BM25Retriever.rst", "references/generated/ragger_duck.retrieval.RetrieverReranker.rst", "references/generated/ragger_duck.retrieval.SemanticRetriever.rst", "references/generated/ragger_duck.scraping.APINumPyDocExtractor.rst", "references/generated/ragger_duck.scraping.GalleryExampleExtractor.rst", "references/generated/ragger_duck.scraping.UserGuideDocExtractor.rst", "references/index.rst", "references/prompt.rst", "references/retrieval.rst", "references/scraping.rst", "user_guide/index.rst", "user_guide/information_retrieval.rst", "user_guide/large_language_model.rst", "user_guide/text_scraping.rst", "whats_new.rst", "whats_new/v0.1.rst"], "titles": ["About us", "Examples", "Ragger Duck documentation", "Getting Started", "Embedding", "SentenceTransformer", "BasicPromptingStrategy", "BM25Retriever", "RetrieverReranker", "SemanticRetriever", "APINumPyDocExtractor", "GalleryExampleExtractor", "UserGuideDocExtractor", "API reference", "Prompt", "Retrieval", "Scraping the documentation", "User guide", "Retriever", "Prompting", "Text Scraping", "Release history", "Version 0.1"], "terms": {"thi": [0, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 17, 20], "i": [0, 1, 2, 3, 5, 7, 8, 9, 10, 11, 12, 13, 18, 19, 20], "sundai": 0, "afternoon": 0, "project": [0, 2, 17, 18], "One": 1, "dai": 1, "write": 1, "some": [1, 2, 3, 17, 20], "right": [1, 8], "now": [1, 3, 17], "someth": 1, "locat": 1, "script": 1, "folder": [1, 10, 11, 12], "date": 2, "apr": 2, "19": 2, "2024": 2, "version": [2, 3, 5, 10, 11, 12], "us": [2, 5, 6, 7, 8, 9, 10, 11, 12, 17, 18, 20], "link": [2, 3], "sourc": [2, 3, 10, 11, 12, 17], "repositori": [2, 3, 5], "issu": 2, "idea": [2, 17, 18], "sklearn": [2, 3, 7], "The": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16, 17, 20], "provid": [2, 3, 17, 18, 20], "set": [2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 17], "tool": 2, "build": [2, 9, 17, 18, 20], "rag": [2, 3, 17], "answer": [2, 6, 17, 19], "question": [2, 17, 19], "about": 2, "scikit": [2, 5, 9, 16], "learn": [2, 5, 9, 16], "librari": [2, 9], "get": [2, 5, 6, 7, 8, 9, 10, 11, 12, 17, 19], "start": 2, "check": [2, 5, 6, 7, 8, 9, 10, 11, 12], "out": 2, "guid": [2, 3, 5, 6, 7, 8, 9, 10, 11, 12], "instal": [2, 3], "extra": 2, "inform": [2, 5, 6, 7, 8, 9, 10, 11, 12, 20], "new": [2, 5, 10, 11, 12, 17], "contribut": 2, "also": [2, 3, 17], "To": [2, 3], "guidelin": 2, "user": [2, 3, 5, 6, 7, 8, 9, 10, 11, 12], "depth": [2, 17], "kei": [2, 5], "concept": 2, "background": 2, "explan": 2, "api": [2, 3, 5, 9, 10, 11, 12, 17], "refer": [2, 3, 20], "contain": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16, 17, 20], "detail": [2, 3], "descript": 2, "avail": [2, 18], "exampl": [2, 3, 5, 10, 11, 12], "galleri": [2, 3, 11], "good": 2, "place": 2, "see": [2, 5, 10, 11, 12], "action": 2, "select": [2, 3], "an": [2, 3, 5, 6, 9, 10, 11, 12, 17, 18], "dive": 2, "eas": 3, "deploy": 3, "we": [3, 6, 8, 9, 17, 18, 19, 20], "reli": 3, "pixi": 3, "follow": [3, 5, 8, 9, 17], "short": 3, "current": 3, "support": 3, "platform": 3, "should": [3, 5, 6, 17], "enough": 3, "curl": 3, "fssl": 3, "http": [3, 17], "sh": 3, "bash": 3, "In": [3, 17, 18, 21, 22], "latest": 3, "stage": [3, 17], "charg": 3, "creat": [3, 5, 9, 17], "python": 3, "environ": 3, "alreadi": 3, "setup": 3, "sever": [3, 8, 17], "you": [3, 8, 17], "depend": 3, "hardwar": 3, "your": 3, "dispos": 3, "cpu": [3, 5, 11, 12], "cross": [3, 8, 18], "e": [3, 5], "linux": 3, "maco": 3, "x86_64": 3, "arm64": 3, "mp": [3, 5], "m1": 3, "m2": 3, "m3": 3, "chip": 3, "cuda": [3, 5], "12": 3, "1": [3, 5, 7, 9, 10, 11, 12, 17], "machin": 3, "gpu": [3, 5], "make": [3, 17, 18], "experi": 3, "scalewai": 3, "instanc": [3, 5, 6, 7, 8, 9, 10, 11, 12], "l4": 3, "11": 3, "7": 3, "similar": [3, 9, 17, 18], "instead": 3, "note": [3, 8, 17], "can": [3, 5, 8, 10, 12, 17, 20], "modifi": 3, "toml": 3, "own": 3, "sinc": 3, "might": 3, "suit": 3, "need": [3, 18], "github": 3, "self": [3, 5, 6, 7, 8, 9, 10, 11, 12], "all": [3, 8], "necessari": 3, "file": 3, "recurs": 3, "wai": [3, 17, 20], "submodul": 3, "git": 3, "com": 3, "glemaitr": 3, "first": [3, 5, 17], "html": [3, 12], "gener": 3, "page": [3, 12], "run": [3, 11, 12, 17], "command": 3, "frozen": 3, "doc": 3, "have": [3, 5, 6, 7, 8, 9, 10, 11, 12, 17, 18], "differ": [3, 17, 20], "each": [3, 5, 6, 7, 8, 9, 10, 11, 12, 18], "type": [3, 17, 18], "more": [3, 17, 18, 20], "strategi": [3, 6, 19], "propos": [3, 17], "specif": [3, 20], "specifi": 3, "llm": [3, 6, 17, 19], "For": 3, "test": 3, "purpos": 3, "mistral": [3, 17], "7b": 3, "fetch": 3, "ar": [3, 5, 6, 7, 8, 9, 10, 11, 12, 17, 18, 20], "Then": [3, 17, 18], "requir": 3, "which": [3, 17], "want": [3, 8, 17], "offload": 3, "access": 3, "address": 3, "127": 3, "0": [3, 7], "8123": 3, "when": [3, 17, 20], "discuss": 3, "earlier": 3, "edit": 3, "mode": 3, "howev": [3, 17], "via": 3, "pip": 3, "don": [3, 20], "t": [3, 20], "ani": [3, 18, 20], "better": [3, 17], "handl": 3, "modul": [4, 5, 16, 20], "ragger_duck": [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16], "function": [4, 10, 12, 16, 20], "emb": [4, 5, 9], "transform": [4, 5, 7, 9, 10, 11, 12, 17, 18], "allow": [4, 6], "text": [4, 5, 7, 10, 11, 12, 17], "class": [5, 6, 7, 8, 9, 10, 11, 12, 20], "embed": [5, 7, 9, 13, 18], "model_name_or_path": 5, "none": [5, 7, 8, 9, 10, 11, 12], "devic": 5, "cache_fold": 5, "use_auth_token": 5, "batch_siz": 5, "32": 5, "show_progress_bar": 5, "true": [5, 6, 7, 8, 9, 10, 11, 12], "sentenc": [5, 9, 18], "thin": 5, "wrapper": 5, "around": 5, "sentence_transform": [5, 8], "thu": 5, "insid": 5, "pipelin": [5, 6, 7, 8, 9, 10, 11, 12], "paramet": [5, 6, 7, 8, 9, 10, 11, 12, 20], "str": [5, 7, 8, 9, 12], "default": [5, 6, 7, 8, 9, 10, 11, 12], "If": [5, 6, 7, 8, 9, 10, 11, 12, 18], "filepath": 5, "disc": 5, "load": 5, "model": [5, 6, 17, 18, 19], "from": [5, 7, 8, 9, 10, 11, 12, 17, 18, 19, 20], "path": [5, 10, 11, 12], "tri": 5, "download": [5, 18], "pre": [5, 18], "train": [5, 7, 8, 9, 17, 18], "fail": 5, "construct": 5, "huggingfac": [5, 18], "name": [5, 6, 7, 8, 9, 10, 11, 12, 18], "iter": 5, "nn": 5, "custom": 5, "scratch": 5, "g": 5, "comput": [5, 7, 8, 9], "store": 5, "bool": [5, 6, 7, 8, 9, 10, 11, 12], "authent": 5, "token": 5, "privat": 5, "int": [5, 7, 8, 9, 11, 12], "batch": 5, "size": [5, 11, 12, 20], "dure": [5, 17, 18], "whether": [5, 6, 8], "show": [5, 17], "progress": [5, 21, 22], "bar": 5, "method": [5, 6, 7, 8, 9, 10, 11, 12], "fit": [5, 7, 8, 9, 10, 11, 12], "x": [5, 7, 8, 9, 10, 11, 12, 17], "y": [5, 7, 8, 9, 10, 11, 12, 17], "No": [5, 10, 11, 12], "op": [5, 10, 11, 12], "oper": [5, 10, 11, 12], "onli": [5, 10, 11, 12, 20], "valid": [5, 10, 11, 12], "ignor": [5, 7, 8, 9, 10, 11, 12], "return": [5, 6, 7, 8, 9, 10, 11, 12, 17], "estim": [5, 6, 7, 8, 9, 10, 11, 12, 20], "fit_transform": [5, 10, 11, 12], "fit_param": [5, 10, 11, 12], "data": [5, 7, 8, 9, 10, 11, 12], "option": [5, 10, 11, 12], "arrai": [5, 10, 11, 12], "like": [5, 10, 11, 12], "shape": [5, 7, 9, 10, 11, 12], "n_sampl": [5, 10, 11, 12], "n_featur": [5, 7, 9, 10, 11, 12], "input": [5, 7, 8, 9, 10, 11, 12], "sampl": [5, 10, 11, 12], "n_output": [5, 10, 11, 12], "target": [5, 10, 11, 12], "valu": [5, 6, 7, 8, 9, 10, 11, 12], "unsupervis": [5, 10, 11, 12], "dict": [5, 6, 7, 8, 9, 10, 11, 12], "addit": [5, 10, 11, 12, 17], "x_new": [5, 10, 11, 12], "ndarrai": [5, 7, 9, 10, 11, 12], "n_features_new": [5, 10, 11, 12], "get_metadata_rout": [5, 6, 7, 8, 9, 10, 11, 12], "metadata": [5, 6, 7, 8, 9, 10, 11, 12], "rout": [5, 6, 7, 8, 9, 10, 11, 12], "object": [5, 6, 7, 8, 9, 10, 11, 12], "pleas": [5, 6, 7, 8, 9, 10, 11, 12], "how": [5, 6, 7, 8, 9, 10, 11, 12, 17], "mechan": [5, 6, 7, 8, 9, 10, 11, 12], "work": [5, 6, 7, 8, 9, 10, 11, 12], "metadatarequest": [5, 6, 7, 8, 9, 10, 11, 12], "A": [5, 6, 7, 8, 9, 10, 11, 12, 17], "encapsul": [5, 6, 7, 8, 9, 10, 11, 12], "get_param": [5, 6, 7, 8, 9, 10, 11, 12], "deep": [5, 6, 7, 8, 9, 10, 11, 12], "subobject": [5, 6, 7, 8, 9, 10, 11, 12], "param": [5, 6, 7, 8, 9, 10, 11, 12], "map": [5, 6, 7, 8, 9, 10, 11, 12], "set_output": [5, 10, 11, 12], "output": [5, 10, 11, 12], "sphx_glr_auto_examples_miscellaneous_plot_set_output": [5, 10, 11, 12], "py": [5, 10, 11, 12], "panda": [5, 10, 11, 12], "configur": [5, 10, 11, 12], "format": [5, 10, 11, 12], "datafram": [5, 10, 11, 12], "polar": [5, 10, 11, 12], "unchang": [5, 10, 11, 12], "4": [5, 10, 11, 12, 17], "wa": [5, 10, 11, 12], "ad": [5, 10, 11, 12], "set_param": [5, 6, 7, 8, 9, 10, 11, 12], "simpl": [5, 6, 7, 8, 9, 10, 11, 12, 17, 20], "well": [5, 6, 7, 8, 9, 10, 11, 12], "nest": [5, 6, 7, 8, 9, 10, 11, 12], "latter": [5, 6, 7, 8, 9, 10, 11, 12], "form": [5, 6, 7, 8, 9, 10, 11, 12], "compon": [5, 6, 7, 8, 9, 10, 11, 12, 17], "__": [5, 6, 7, 8, 9, 10, 11, 12], "so": [5, 6, 7, 8, 9, 10, 11, 12], "": [5, 6, 7, 8, 9, 10, 11, 12, 17], "possibl": [5, 6, 7, 8, 9, 10, 11, 12], "updat": [5, 6, 7, 8, 9, 10, 11, 12], "vector": [5, 7, 17, 18], "length": 5, "n_sentenc": [5, 7, 9], "singl": 5, "list": [5, 7, 8, 9, 10, 11, 12], "dictionari": [5, 10, 11, 12], "embedding_s": 5, "prompt": [6, 13, 17], "retriev": [6, 7, 8, 9, 13, 19], "use_retrieved_context": 6, "queri": [6, 7, 8, 9, 17, 18], "onc": [6, 19], "context": [6, 8, 17, 18, 19], "request": [6, 19], "languag": [6, 17, 19], "expect": [6, 8, 9, 17], "implement": [6, 8, 18, 19], "__call__": 6, "take": 6, "respons": 6, "It": [6, 10, 17, 20], "instruct": 6, "base": [6, 7, 8, 17], "befor": [6, 17], "count_vector": 7, "top_k": [7, 9], "b": 7, "75": 7, "k1": 7, "6": 7, "k": [7, 9], "nearest": [7, 9, 17, 18], "neighbor": [7, 9, 17, 18], "lexic": [7, 8, 17, 19], "search": [7, 17, 19], "bm25": 7, "count": [7, 17], "term": 7, "document": [7, 8, 9, 10, 11, 12, 13, 17, 18], "feature_extract": 7, "countvector": [7, 18], "number": [7, 8, 9, 11, 12], "attribut": [7, 9, 11, 12], "x_fit_": [7, 9], "x_embedded_": [7, 9], "vocabulari": [7, 8, 18], "idf": [7, 8, 18], "most": [7, 8, 9, 17], "relev": [7, 8, 9, 17], "cross_encod": 8, "min_top_k": 8, "max_top_k": 8, "threshold": 8, "drop_dupl": 8, "hybrid": 8, "semant": [8, 9, 17, 19], "encod": [8, 18], "rerank": [8, 17, 19], "accept": 8, "case": [8, 17, 18], "result": [8, 17, 18], "crossencod": 8, "minimum": 8, "less": 8, "than": 8, "maximum": 8, "float": 8, "filter": 8, "score": [8, 18], "drop": 8, "duplic": 8, "step": [8, 17], "done": [8, 17], "after": 8, "index": [9, 17], "faiss": [9, 18], "inner": [9, 17], "product": 9, "cosin": 9, "mean": [9, 17], "normal": 9, "scrape": [10, 11, 12, 13, 17], "extract": [10, 11, 12, 20], "numpydoc": [10, 20], "process": [10, 12], "templat": 10, "pathlib": [10, 11, 12], "chunk_siz": [11, 12], "300": [11, 12], "chunk_overlap": [11, 12], "50": [11, 12], "n_job": [11, 12], "chunk": [11, 12, 18, 20], "split": [11, 12], "overlap": [11, 12], "between": [11, 12, 17, 18], "two": [11, 12, 17, 18], "consecut": [11, 12], "job": [11, 12], "parallel": [11, 12], "core": [11, 12], "text_splitter_": [11, 12], "langchain": [11, 12], "text_splitt": [11, 12], "recursivecharactertextsplitt": [11, 12], "splitter": [11, 12], "folders_to_exclud": 12, "string": 12, "correspond": [12, 17], "exclud": 12, "full": 13, "packag": 13, "apinumpydocextractor": [13, 20], "galleryexampleextractor": 13, "userguidedocextractor": [13, 20], "sentencetransform": [13, 18], "bm25retriev": [13, 18], "semanticretriev": [13, 18], "retrieverrerank": [13, 18], "basicpromptingstrategi": [13, 19], "websit": [16, 20], "go": 17, "our": [17, 18], "framework": 17, "high": 17, "level": 17, "overview": 17, "main": 17, "let": 17, "defin": 17, "larg": [17, 19], "graphic": 17, "below": 17, "repres": 17, "interact": 17, "proof": 17, "poc": 17, "interest": 17, "zero": 17, "shot": 17, "formul": 17, "natur": 17, "through": 17, "gpt": 17, "openai": 17, "ii": 17, "local": 17, "open": 17, "weight": [17, 18], "llama": 17, "introduc": 17, "major": 17, "previou": 17, "consist": 17, "given": [17, 18], "therefor": 17, "condit": 17, "ha": 17, "been": 17, "extens": 17, "studi": 17, "past": 17, "relat": 17, "applic": 17, "engin": 17, "next": 17, "section": [17, 20], "explain": 17, "algorithm": [17, 18], "mathemat": 17, "represent": 17, "databas": [17, 18], "capabl": 17, "find": 17, "phase": 17, "These": 17, "popul": 17, "pass": 17, "found": 17, "those": 17, "coupl": 17, "reader": 17, "comprehens": 17, "review": 17, "without": [17, 20], "distinguish": 17, "bag": 17, "word": 17, "bow": 17, "neural": 17, "network": 17, "thei": [17, 20], "lack": 17, "abil": 17, "captur": 17, "approach": 17, "improv": 17, "perform": 17, "expand": 17, "topic": 17, "spars": 17, "leverag": 17, "invert": 17, "continu": 17, "space": [17, 18], "while": [17, 20], "complex": 17, "due": 17, "dens": 17, "approxim": [17, 18], "guo": 17, "j": 17, "cai": 17, "fan": 17, "sun": 17, "f": 17, "zhang": 17, "r": 17, "cheng": 17, "2022": 17, "acm": 17, "transact": 17, "system": 17, "toi": 17, "40": 17, "42": 17, "arxiv": 17, "org": 17, "ab": 17, "2103": 17, "04831": 17, "present": 17, "behind": 17, "devil": 17, "import": 17, "meaning": [17, 20], "merg": 17, "differenti": 18, "exact": 18, "match": 18, "tf": 18, "scheme": 18, "seen": 18, "flexibl": 18, "closest": 18, "As": 18, "both": 18, "microsoft": 18, "bing": 18, "pair": 18, "advanc": 20, "scraper": 20, "inde": 20, "pars": 20, "control": 20, "alwai": 20, "hope": 20, "remov": 20, "ambigu": 20, "could": 20, "exist": 20, "webpag": 20, "addition": 20}, "objects": {"ragger_duck": [[4, 0, 0, "-", "embedding"], [14, 0, 0, "-", "prompt"], [15, 0, 0, "-", "retrieval"], [16, 0, 0, "-", "scraping"]], "ragger_duck.embedding": [[5, 1, 1, "", "SentenceTransformer"]], "ragger_duck.embedding.SentenceTransformer": [[5, 2, 1, "", "fit"], [5, 2, 1, "", "fit_transform"], [5, 2, 1, "", "get_metadata_routing"], [5, 2, 1, "", "get_params"], [5, 2, 1, "", "set_output"], [5, 2, 1, "", "set_params"], [5, 2, 1, "", "transform"]], "ragger_duck.prompt": [[6, 1, 1, "", "BasicPromptingStrategy"]], "ragger_duck.prompt.BasicPromptingStrategy": [[6, 2, 1, "", "get_metadata_routing"], [6, 2, 1, "", "get_params"], [6, 2, 1, "", "set_params"]], "ragger_duck.retrieval": [[7, 1, 1, "", "BM25Retriever"], [8, 1, 1, "", "RetrieverReranker"], [9, 1, 1, "", "SemanticRetriever"]], "ragger_duck.retrieval.BM25Retriever": [[7, 2, 1, "", "fit"], [7, 2, 1, "", "get_metadata_routing"], [7, 2, 1, "", "get_params"], [7, 2, 1, "", "query"], [7, 2, 1, "", "set_params"]], "ragger_duck.retrieval.RetrieverReranker": [[8, 2, 1, "", "fit"], [8, 2, 1, "", "get_metadata_routing"], [8, 2, 1, "", "get_params"], [8, 2, 1, "", "query"], [8, 2, 1, "", "set_params"]], "ragger_duck.retrieval.SemanticRetriever": [[9, 2, 1, "", "fit"], [9, 2, 1, "", "get_metadata_routing"], [9, 2, 1, "", "get_params"], [9, 2, 1, "", "query"], [9, 2, 1, "", "set_params"]], "ragger_duck.scraping": [[10, 1, 1, "", "APINumPyDocExtractor"], [11, 1, 1, "", "GalleryExampleExtractor"], [12, 1, 1, "", "UserGuideDocExtractor"]], "ragger_duck.scraping.APINumPyDocExtractor": [[10, 2, 1, "", "fit"], [10, 2, 1, "", "fit_transform"], [10, 2, 1, "", "get_metadata_routing"], [10, 2, 1, "", "get_params"], [10, 2, 1, "", "set_output"], [10, 2, 1, "", "set_params"], [10, 2, 1, "", "transform"]], "ragger_duck.scraping.GalleryExampleExtractor": [[11, 2, 1, "", "fit"], [11, 2, 1, "", "fit_transform"], [11, 2, 1, "", "get_metadata_routing"], [11, 2, 1, "", "get_params"], [11, 2, 1, "", "set_output"], [11, 2, 1, "", "set_params"], [11, 2, 1, "", "transform"]], "ragger_duck.scraping.UserGuideDocExtractor": [[12, 2, 1, "", "fit"], [12, 2, 1, "", "fit_transform"], [12, 2, 1, "", "get_metadata_routing"], [12, 2, 1, "", "get_params"], [12, 2, 1, "", "set_output"], [12, 2, 1, "", "set_params"], [12, 2, 1, "", "transform"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"]}, "titleterms": {"about": 0, "u": 0, "exampl": 1, "ragger": [2, 3], "duck": [2, 3], "document": [2, 3, 16, 19, 20], "get": 3, "start": 3, "deploi": 3, "clone": 3, "project": 3, "build": 3, "scikit": 3, "learn": 3, "train": 3, "semant": [3, 18], "lexic": [3, 18], "retriev": [3, 15, 17, 18], "download": 3, "larg": 3, "languag": 3, "model": 3, "launch": 3, "web": 3, "consol": 3, "us": 3, "librari": 3, "embed": 4, "sentencetransform": 5, "basicpromptingstrategi": 6, "bm25retriev": 7, "retrieverrerank": 8, "semanticretriev": 9, "apinumpydocextractor": 10, "galleryexampleextractor": 11, "userguidedocextractor": 12, "api": [13, 19, 20], "refer": [13, 17], "prompt": [14, 19], "scrape": [16, 20], "user": [17, 20], "guid": [17, 20], "what": 17, "i": 17, "augment": 17, "gener": 17, "inform": 17, "concept": 17, "detail": 17, "regard": 17, "implement": 17, "rerank": 18, "merg": 18, "text": 20, "releas": 21, "histori": 21, "version": [21, 22], "0": [21, 22], "1": [21, 22], "changelog": [21, 22]}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 60}, "alltitles": {"About us": [[0, "about-us"]], "Examples": [[1, "examples"]], "Ragger Duck documentation": [[2, "ragger-duck-documentation"]], "Getting Started": [[3, "getting-started"]], "Deploy Ragger Duck": [[3, "deploy-ragger-duck"]], "Cloning the project": [[3, "cloning-the-project"]], "Build the scikit-learn documentation": [[3, "build-the-scikit-learn-documentation"]], "Train the semantic and lexical retrievers": [[3, "train-the-semantic-and-lexical-retrievers"]], "Download the Large Language Model": [[3, "download-the-large-language-model"]], "Launch the Web Console": [[3, "launch-the-web-console"]], "Use the Ragger Duck library": [[3, "use-the-ragger-duck-library"]], "Embedding": [[4, "module-ragger_duck.embedding"]], "SentenceTransformer": [[5, "sentencetransformer"]], "BasicPromptingStrategy": [[6, "basicpromptingstrategy"]], "BM25Retriever": [[7, "bm25retriever"]], "RetrieverReranker": [[8, "retrieverreranker"]], "SemanticRetriever": [[9, "semanticretriever"]], "APINumPyDocExtractor": [[10, "apinumpydocextractor"]], "GalleryExampleExtractor": [[11, "galleryexampleextractor"]], "UserGuideDocExtractor": [[12, "userguidedocextractor"]], "API reference": [[13, "api-reference"]], "Prompt": [[14, "module-ragger_duck.prompt"]], "Retrieval": [[15, "module-ragger_duck.retrieval"]], "Scraping the documentation": [[16, "module-ragger_duck.scraping"]], "User Guide": [[17, "user-guide"]], "What is Retrieval Augmented Generation?": [[17, "what-is-retrieval-augmented-generation"]], "Information retrieval": [[17, "information-retrieval"]], "Concepts": [[17, "concepts"]], "Details regarding the retrievers": [[17, "details-regarding-the-retrievers"]], "References": [[17, "references"]], "Implementation details": [[17, "implementation-details"]], "Retriever": [[18, "retriever"]], "Lexical retrievers": [[18, "lexical-retrievers"]], "Semantical retrievers": [[18, "semantical-retrievers"]], "Reranker: merging lexical and semantical retrievers": [[18, "reranker-merging-lexical-and-semantical-retrievers"]], "Prompting": [[19, "prompting"]], "Prompting for API documentation": [[19, "prompting-for-api-documentation"]], "Text Scraping": [[20, "text-scraping"]], "API documentation": [[20, "api-documentation"]], "User Guide documentation": [[20, "user-guide-documentation"]], "Release history": [[21, "release-history"]], "Version 0.1": [[21, "version-0-1"], [22, "version-0-1"]], "Changelog": [[21, "changelog"], [22, "changelog"]]}, "indexentries": {"module": [[4, "module-ragger_duck.embedding"], [14, "module-ragger_duck.prompt"], [15, "module-ragger_duck.retrieval"], [16, "module-ragger_duck.scraping"]], "ragger_duck.embedding": [[4, "module-ragger_duck.embedding"]], "sentencetransformer (class in ragger_duck.embedding)": [[5, "ragger_duck.embedding.SentenceTransformer"]], "fit() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.fit"]], "fit_transform() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.fit_transform"]], "get_metadata_routing() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.get_metadata_routing"]], "get_params() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.get_params"]], "set_output() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.set_output"]], "set_params() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.set_params"]], "transform() (ragger_duck.embedding.sentencetransformer method)": [[5, "ragger_duck.embedding.SentenceTransformer.transform"]], "basicpromptingstrategy (class in ragger_duck.prompt)": [[6, "ragger_duck.prompt.BasicPromptingStrategy"]], "get_metadata_routing() (ragger_duck.prompt.basicpromptingstrategy method)": [[6, "ragger_duck.prompt.BasicPromptingStrategy.get_metadata_routing"]], "get_params() (ragger_duck.prompt.basicpromptingstrategy method)": [[6, "ragger_duck.prompt.BasicPromptingStrategy.get_params"]], "set_params() (ragger_duck.prompt.basicpromptingstrategy method)": [[6, "ragger_duck.prompt.BasicPromptingStrategy.set_params"]], "bm25retriever (class in ragger_duck.retrieval)": [[7, "ragger_duck.retrieval.BM25Retriever"]], "fit() (ragger_duck.retrieval.bm25retriever method)": [[7, "ragger_duck.retrieval.BM25Retriever.fit"]], "get_metadata_routing() (ragger_duck.retrieval.bm25retriever method)": [[7, "ragger_duck.retrieval.BM25Retriever.get_metadata_routing"]], "get_params() (ragger_duck.retrieval.bm25retriever method)": [[7, "ragger_duck.retrieval.BM25Retriever.get_params"]], "query() (ragger_duck.retrieval.bm25retriever method)": [[7, "ragger_duck.retrieval.BM25Retriever.query"]], "set_params() (ragger_duck.retrieval.bm25retriever method)": [[7, "ragger_duck.retrieval.BM25Retriever.set_params"]], "retrieverreranker (class in ragger_duck.retrieval)": [[8, "ragger_duck.retrieval.RetrieverReranker"]], "fit() (ragger_duck.retrieval.retrieverreranker method)": [[8, "ragger_duck.retrieval.RetrieverReranker.fit"]], "get_metadata_routing() (ragger_duck.retrieval.retrieverreranker method)": [[8, "ragger_duck.retrieval.RetrieverReranker.get_metadata_routing"]], "get_params() (ragger_duck.retrieval.retrieverreranker method)": [[8, "ragger_duck.retrieval.RetrieverReranker.get_params"]], "query() (ragger_duck.retrieval.retrieverreranker method)": [[8, "ragger_duck.retrieval.RetrieverReranker.query"]], "set_params() (ragger_duck.retrieval.retrieverreranker method)": [[8, "ragger_duck.retrieval.RetrieverReranker.set_params"]], "semanticretriever (class in ragger_duck.retrieval)": [[9, "ragger_duck.retrieval.SemanticRetriever"]], "fit() (ragger_duck.retrieval.semanticretriever method)": [[9, "ragger_duck.retrieval.SemanticRetriever.fit"]], "get_metadata_routing() (ragger_duck.retrieval.semanticretriever method)": [[9, "ragger_duck.retrieval.SemanticRetriever.get_metadata_routing"]], "get_params() (ragger_duck.retrieval.semanticretriever method)": [[9, "ragger_duck.retrieval.SemanticRetriever.get_params"]], "query() (ragger_duck.retrieval.semanticretriever method)": [[9, "ragger_duck.retrieval.SemanticRetriever.query"]], "set_params() (ragger_duck.retrieval.semanticretriever method)": [[9, "ragger_duck.retrieval.SemanticRetriever.set_params"]], "apinumpydocextractor (class in ragger_duck.scraping)": [[10, "ragger_duck.scraping.APINumPyDocExtractor"]], "fit() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.fit"]], "fit_transform() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.fit_transform"]], "get_metadata_routing() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.get_metadata_routing"]], "get_params() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.get_params"]], "set_output() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.set_output"]], "set_params() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.set_params"]], "transform() (ragger_duck.scraping.apinumpydocextractor method)": [[10, "ragger_duck.scraping.APINumPyDocExtractor.transform"]], "galleryexampleextractor (class in ragger_duck.scraping)": [[11, "ragger_duck.scraping.GalleryExampleExtractor"]], "fit() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.fit"]], "fit_transform() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.fit_transform"]], "get_metadata_routing() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.get_metadata_routing"]], "get_params() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.get_params"]], "set_output() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.set_output"]], "set_params() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.set_params"]], "transform() (ragger_duck.scraping.galleryexampleextractor method)": [[11, "ragger_duck.scraping.GalleryExampleExtractor.transform"]], "userguidedocextractor (class in ragger_duck.scraping)": [[12, "ragger_duck.scraping.UserGuideDocExtractor"]], "fit() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.fit"]], "fit_transform() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.fit_transform"]], "get_metadata_routing() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.get_metadata_routing"]], "get_params() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.get_params"]], "set_output() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.set_output"]], "set_params() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.set_params"]], "transform() (ragger_duck.scraping.userguidedocextractor method)": [[12, "ragger_duck.scraping.UserGuideDocExtractor.transform"]], "ragger_duck.prompt": [[14, "module-ragger_duck.prompt"]], "ragger_duck.retrieval": [[15, "module-ragger_duck.retrieval"]], "ragger_duck.scraping": [[16, "module-ragger_duck.scraping"]]}}) \ No newline at end of file