diff --git a/404.html b/404.html index ba4a697a..efb8d1e1 100644 --- a/404.html +++ b/404.html @@ -7,7 +7,7 @@ - +
diff --git a/assets/js/6e787a66.00c6e52b.js b/assets/js/6e787a66.83fcd0a7.js similarity index 82% rename from assets/js/6e787a66.00c6e52b.js rename to assets/js/6e787a66.83fcd0a7.js index a3ea1501..3c16d252 100644 --- a/assets/js/6e787a66.00c6e52b.js +++ b/assets/js/6e787a66.83fcd0a7.js @@ -1 +1 @@ -"use strict";(self.webpackChunkai_rag_lab=self.webpackChunkai_rag_lab||[]).push([[301],{8708:(e,n,t)=>{t.r(n),t.d(n,{assets:()=>s,contentTitle:()=>a,default:()=>h,frontMatter:()=>o,metadata:()=>i,toc:()=>c});var r=t(4848),d=t(8453);const o={},a="\ud83d\udc50 Generate embeddings",i={id:"prepare-the-data/embed-data",title:"\ud83d\udc50 Generate embeddings",description:"To perform vector search on our data, we need to embed it (i.e. generate embedding vectors) before ingesting it into MongoDB.",source:"@site/docs/50-prepare-the-data/4-embed-data.mdx",sourceDirName:"50-prepare-the-data",slug:"/prepare-the-data/embed-data",permalink:"/ai-rag-lab/docs/prepare-the-data/embed-data",draft:!1,unlisted:!1,editUrl:"https://github.com/mongodb-developer/ai-rag-lab/blob/main/docs/50-prepare-the-data/4-embed-data.mdx",tags:[],version:"current",sidebarPosition:4,frontMatter:{},sidebar:"tutorialSidebar",previous:{title:"\ud83d\udc50 Chunk up the data",permalink:"/ai-rag-lab/docs/prepare-the-data/chunk-data"},next:{title:"\ud83d\udc50 Ingest data into MongoDB",permalink:"/ai-rag-lab/docs/prepare-the-data/ingest-data"}},s={},c=[];function l(e){const n={admonition:"admonition",code:"code",h1:"h1",p:"p",pre:"pre",strong:"strong",...(0,d.R)(),...e.components},{Details:t}=n;return t||function(e,n){throw new Error("Expected "+(n?"component":"object")+" `"+e+"` to be defined: you likely forgot to import, pass, or provide it.")}("Details",!0),(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(n.h1,{id:"-generate-embeddings",children:"\ud83d\udc50 Generate embeddings"}),"\n",(0,r.jsx)(n.p,{children:"To perform vector search on our data, we need to embed it (i.e. generate embedding vectors) before ingesting it into MongoDB."}),"\n",(0,r.jsxs)(n.p,{children:["Fill in any ",(0,r.jsx)(n.code,{children:"embedding = embedding_model.encode(text)
return embedding.tolist()
CODE_BLOCK_10
for doc in tqdm(split_docs):
doc["embedding"] = get_embedding(doc["body"])
embedded_docs.append(doc)
If the embedding generation is taking too long (> 2-3 min), kill/interrupt the cell and move on to the next step with the documents that have been embedded up until that point.
If the embedding generation is taking too long (> 5 min), kill/interrupt the cell and move on to the next step with the documents that have been embedded up until that point.