From 523f9f459338f1edae0dbdfef3ea601b05810d4b Mon Sep 17 00:00:00 2001 From: Scott Twiname Date: Thu, 10 Oct 2024 16:35:21 +1300 Subject: [PATCH 1/8] Start http service earlier and add /ready http route --- docker-compose.yml | 29 +++++++++++++---------- src/app.ts | 8 +++---- src/http.ts | 9 +++++++ src/sandbox/webWorker/webWorkerSandbox.ts | 2 +- 4 files changed, 30 insertions(+), 18 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 7f57bcf..d6e1339 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,9 +1,9 @@ services: subql-ai: - image: subquerynetwork/subql-ai-app - # build: - # context: . - # dockerfile: ./Dockerfile + # image: subquerynetwork/subql-ai-app + build: + context: . + dockerfile: ./Dockerfile ports: - 7827:7827 restart: unless-stopped @@ -14,11 +14,11 @@ services: # - -p=/app/index.ts # TODO this doesn't work because dependencies are not copied - -p=ipfs://QmNaNBhXJoFpRJeNQcnTH8Yh6Rf4pzJy6VSnfnQSZHysdZ - -h=http://host.docker.internal:11434 - # healthcheck: - # test: ["CMD", "curl", "-f", "http://subql-ai:7827/health"] - # interval: 3s - # timeout: 5s - # retries: 10 + healthcheck: + test: ["CMD", "curl", "-f", "http://subql-ai:7827/ready"] + interval: 3s + timeout: 5s + retries: 10 # A simple chat UI ui: @@ -26,12 +26,15 @@ services: ports: - 8080:8080 restart: always + depends_on: + "subql-ai": + condition: service_healthy environment: - - 'OPENAI_API_BASE_URLS=http://subql-ai:7827/v1' - - 'OPENAI_API_KEYS=foobar' - - 'WEBUI_AUTH=false' + - "OPENAI_API_BASE_URLS=http://subql-ai:7827/v1" + - "OPENAI_API_KEYS=foobar" + - "WEBUI_AUTH=false" volumes: - - open-webui:/app/backend/data + - open-webui:/app/backend/data volumes: open-webui: diff --git a/src/app.ts b/src/app.ts index 1ac15d3..339e7c8 100644 --- a/src/app.ts +++ b/src/app.ts @@ -31,7 +31,7 @@ export async function runApp(config: { ); const sandbox = await getDefaultSandbox(resolve(projectPath)); - const ctx = await makeContext( + const pendingCtx = makeContext( sandbox, model, (dbPath) => @@ -44,7 +44,7 @@ export async function runApp(config: { ), ); - const runnerHost = new RunnerHost(() => { + const runnerHost = new RunnerHost(async () => { const chatStorage = new MemoryChatStorage(); chatStorage.append([{ role: "system", content: sandbox.systemPrompt }]); @@ -53,7 +53,7 @@ export async function runApp(config: { sandbox, chatStorage, model, - ctx, + await pendingCtx, ); }); @@ -66,7 +66,7 @@ export async function runApp(config: { break; case "http": default: - http(runnerHost, config.port); + http(runnerHost, config.port, pendingCtx); } } diff --git a/src/http.ts b/src/http.ts index bc114da..dee584b 100644 --- a/src/http.ts +++ b/src/http.ts @@ -71,13 +71,22 @@ export type ChatChunkResponse = Static; export function http( runnerHost: RunnerHost, port: number, + onReady?: Promise, ): Deno.HttpServer { const app = new Hono(); + // The ready status should change once the project is fully loaded, including the vector DB + let ready = false; + onReady?.then(() => ready = true); + app.get("/health", (c) => { return c.text("ok"); }); + app.get("/ready", (c) => { + return c.text(ready.toString()); + }); + app.get("/v1/models", (c) => { return c.json({ object: "list", diff --git a/src/sandbox/webWorker/webWorkerSandbox.ts b/src/sandbox/webWorker/webWorkerSandbox.ts index 43dbfda..2883eac 100644 --- a/src/sandbox/webWorker/webWorkerSandbox.ts +++ b/src/sandbox/webWorker/webWorkerSandbox.ts @@ -26,7 +26,7 @@ export class WebWorkerSandbox implements ISandbox { public static async create(path: string): Promise { const w = new Worker( - import.meta.resolve("./webWorker.ts" /*path*/), + import.meta.resolve("./webWorker.ts"), { type: "module", deno: { From da6e8a056484fd06a28913a2b20c1f000c032902 Mon Sep 17 00:00:00 2001 From: Scott Twiname Date: Thu, 10 Oct 2024 16:36:25 +1300 Subject: [PATCH 2/8] Make CLI interface wait for ctx --- src/app.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/app.ts b/src/app.ts index 339e7c8..699be3c 100644 --- a/src/app.ts +++ b/src/app.ts @@ -59,6 +59,7 @@ export async function runApp(config: { switch (config.interface) { case "cli": + await pendingCtx; if (sandbox.userMessage) { console.log(sandbox.userMessage); } From 45f796ac34b4d13c01238bc105901c0f9c2caaf6 Mon Sep 17 00:00:00 2001 From: Scott Twiname Date: Mon, 14 Oct 2024 16:30:18 +1300 Subject: [PATCH 3/8] Further restrict web worker permissions --- src/app.ts | 4 +-- src/info.ts | 8 +++-- src/loader.ts | 10 +++--- src/sandbox/index.ts | 37 +++++++++++++++++++++-- src/sandbox/webWorker/webWorkerSandbox.ts | 20 +++++++++--- src/util.ts | 3 ++ 6 files changed, 65 insertions(+), 17 deletions(-) diff --git a/src/app.ts b/src/app.ts index 699be3c..aa09524 100644 --- a/src/app.ts +++ b/src/app.ts @@ -23,13 +23,13 @@ export async function runApp(config: { forceReload?: boolean; }): Promise { const model = new Ollama({ host: config.host }); - const projectPath = await loadProject( + const [projectPath, source] = await loadProject( config.projectPath, config.ipfs, undefined, config.forceReload, ); - const sandbox = await getDefaultSandbox(resolve(projectPath)); + const sandbox = await getDefaultSandbox(resolve(projectPath), source); const pendingCtx = makeContext( sandbox, diff --git a/src/info.ts b/src/info.ts index ad017b0..fdc661e 100644 --- a/src/info.ts +++ b/src/info.ts @@ -5,12 +5,14 @@ import type { IProject } from "./project/project.ts"; import type { TSchema } from "@sinclair/typebox"; import type { IPFSClient } from "./ipfs.ts"; import { loadProject } from "./loader.ts"; +import type { ProjectSource } from "./util.ts"; export async function getProjectJson( projectPath: string, + source: ProjectSource, sandboxFactory = getDefaultSandbox, ): Promise & { tools: string[]; config?: TSchema }> { - const sandbox = await sandboxFactory(resolve(projectPath)); + const sandbox = await sandboxFactory(resolve(projectPath), source); return { model: sandbox.model, @@ -26,8 +28,8 @@ export async function projectInfo( ipfs: IPFSClient, json = false, ): Promise { - const loadedPath = await loadProject(projectPath, ipfs); - const projectJson = await getProjectJson(loadedPath); + const [loadedPath, source] = await loadProject(projectPath, ipfs); + const projectJson = await getProjectJson(loadedPath, source); if (json) { console.log(JSON.stringify( diff --git a/src/loader.ts b/src/loader.ts index f251980..35af8dd 100644 --- a/src/loader.ts +++ b/src/loader.ts @@ -3,7 +3,7 @@ import { CIDReg, type IPFSClient } from "./ipfs.ts"; import { resolve } from "@std/path/resolve"; import { UntarStream } from "@std/tar"; import { ensureDir, exists } from "@std/fs"; -import { getSpinner } from "./util.ts"; +import { getSpinner, type ProjectSource } from "./util.ts"; export const getOSTempDir = () => Deno.env.get("TMPDIR") || Deno.env.get("TMP") || Deno.env.get("TEMP") || @@ -14,7 +14,7 @@ export async function loadProject( ipfs: IPFSClient, tmpDir?: string, forceReload?: boolean, -): Promise { +): Promise<[string, ProjectSource]> { if (CIDReg.test(projectPath)) { const spinner = getSpinner().start("Loading project from IPFS"); try { @@ -25,7 +25,7 @@ export async function loadProject( // Early exit if the file has already been fetched if (!forceReload && (await exists(filePath))) { spinner.succeed("Loaded project from IPFS"); - return filePath; + return [filePath, "ipfs"]; } await ensureDir(tmp); @@ -36,14 +36,14 @@ export async function loadProject( spinner.succeed("Loaded project from IPFS"); - return filePath; + return [filePath, "ipfs"]; } catch (e) { spinner.fail("Failed to load project"); throw e; } } - return resolve(projectPath); + return [resolve(projectPath), "local"]; } export async function loadVectorStoragePath( diff --git a/src/sandbox/index.ts b/src/sandbox/index.ts index 8d28cee..30b051a 100644 --- a/src/sandbox/index.ts +++ b/src/sandbox/index.ts @@ -1,13 +1,44 @@ +import type { ProjectSource } from "../util.ts"; import type { ISandbox } from "./sandbox.ts"; -import { WebWorkerSandbox } from "./webWorker/webWorkerSandbox.ts"; +import { + type Permissions, + WebWorkerSandbox, +} from "./webWorker/webWorkerSandbox.ts"; export * from "./sandbox.ts"; export * from "./mockSandbox.ts"; export * from "./unsafeSandbox.ts"; -export function getDefaultSandbox(path: string): Promise { +const IPFS_PERMISSIONS: Permissions = { + allowRead: false, + allowFFI: false, +}; + +const LOCAL_PERMISSIONS: Permissions = { + allowRead: true, + allowFFI: true, +}; + +function getPermisionsForSource(source: ProjectSource): Permissions { + switch (source) { + case "local": + return LOCAL_PERMISSIONS; + case "ipfs": + return IPFS_PERMISSIONS; + default: + throw new Error( + `Unable to set permissions for unknown source: ${source}`, + ); + } +} + +export function getDefaultSandbox( + path: string, + source: ProjectSource, +): Promise { // return UnsafeSandbox.create(path); - return WebWorkerSandbox.create(path); + const permissions = getPermisionsForSource(source); + return WebWorkerSandbox.create(path, permissions); } export { WebWorkerSandbox }; diff --git a/src/sandbox/webWorker/webWorkerSandbox.ts b/src/sandbox/webWorker/webWorkerSandbox.ts index 2883eac..90500fe 100644 --- a/src/sandbox/webWorker/webWorkerSandbox.ts +++ b/src/sandbox/webWorker/webWorkerSandbox.ts @@ -18,13 +18,26 @@ import { loadConfigFromEnv } from "../../util.ts"; import { FromSchema } from "../../fromSchema.ts"; import type { IContext } from "../../context/context.ts"; import type { IVectorConfig } from "../../project/project.ts"; +import { dirname } from "@std/path/dirname"; + +export type Permissions = { + /** + * For local projects allow reading all locations for imports to work. + * TODO: This could be limited to the project dir + DENO_DIR cache but DENO_DIR doesn't provide the default currently + */ + allowRead?: boolean; + allowFFI?: boolean; +}; export class WebWorkerSandbox implements ISandbox { #connection: rpc.MessageConnection; #config: TSchema | undefined; #tools: Tool[]; - public static async create(path: string): Promise { + public static async create( + path: string, + permissions?: Permissions, + ): Promise { const w = new Worker( import.meta.resolve("./webWorker.ts"), { @@ -32,10 +45,9 @@ export class WebWorkerSandbox implements ISandbox { deno: { permissions: { env: false, // Should be passed through in loadConfigFromEnv below - // hrtime: false, net: "inherit", // TODO remove localhost - ffi: true, // Needed for node js ffi - read: true, // Needed for imports to node modules + ffi: permissions?.allowFFI ?? false, // Needed for node js ffi, TODO this could be the same as read permissions + read: permissions?.allowRead ? true : [dirname(path)], run: false, write: false, }, diff --git a/src/util.ts b/src/util.ts index 968bde2..cfffe64 100644 --- a/src/util.ts +++ b/src/util.ts @@ -32,3 +32,6 @@ export function getPrompt(): string | null { return response; } + +// Possible sources where projects can be loaded from +export type ProjectSource = "local" | "ipfs"; From 9191ec1793aeefa3fd5587466bc09e624152824e Mon Sep 17 00:00:00 2001 From: Scott Twiname Date: Tue, 15 Oct 2024 10:11:52 +1300 Subject: [PATCH 4/8] Pretty typebox errors, add spec version to project --- src/bundle.ts | 6 +++++- src/project/project.ts | 19 ++++++++++++------- src/sandbox/webWorker/webWorker.ts | 12 ++++++++++-- src/util.ts | 21 ++++++++++++++++++++- subquery-delegator/index.ts | 1 + 5 files changed, 48 insertions(+), 11 deletions(-) diff --git a/src/bundle.ts b/src/bundle.ts index 7d59698..1a3e1df 100644 --- a/src/bundle.ts +++ b/src/bundle.ts @@ -20,7 +20,11 @@ export async function publishProject( sandboxFactory = getDefaultSandbox, ): Promise { projectPath = await Deno.realPath(projectPath); - const projectJson = await getProjectJson(projectPath, sandboxFactory); + const projectJson = await getProjectJson( + projectPath, + "local", + sandboxFactory, + ); let code = await generateBundle(projectPath); const vectorDbPath = projectJson.vectorStorage?.path; if (vectorDbPath) { diff --git a/src/project/project.ts b/src/project/project.ts index 6ab37fd..99672f1 100644 --- a/src/project/project.ts +++ b/src/project/project.ts @@ -32,6 +32,11 @@ export const VectorConfig = Type.Object({ }); export const Project = Type.Object({ + // Note: If a new spec version is introduced Type.TemplateLiteral could be used here + specVersion: Type.Literal( + "0.0.1", + { description: "The specification version of the project structure." }, + ), model: Type.String({ description: "The llm model to use", }), @@ -71,8 +76,9 @@ export type IProjectEntrypoint = Static< IProjectEntry >; -export function validateProject(project: unknown): void { - return Value.Assert(Project, project); +export function validateProject(project: unknown): project is IProject { + Value.Assert(Project, project); + return true; } function validateProjectEntry(entry: unknown): entry is IProjectEntry { @@ -109,10 +115,9 @@ export async function getProjectFromEntrypoint( // Check that the constructed project is valid const project = await entrypoint.projectFactory(config); - validateProject(project); - - return project; - } else { - throw new Error("Unable to validate project"); + if (validateProject(project)) { + return project; + } } + throw new Error("Unable to validate project"); } diff --git a/src/sandbox/webWorker/webWorker.ts b/src/sandbox/webWorker/webWorker.ts index 426a070..88f544d 100644 --- a/src/sandbox/webWorker/webWorker.ts +++ b/src/sandbox/webWorker/webWorker.ts @@ -19,6 +19,7 @@ import { type IProjectEntrypoint, } from "../../project/project.ts"; import type { IContext } from "../../context/context.ts"; +import { PrettyTypeboxError } from "../../util.ts"; const conn = rpc.createMessageConnection( new BrowserMessageReader(self), @@ -52,9 +53,16 @@ conn.onRequest(Init, async (config) => { throw new Error("Please call `load` first"); } - project ??= await getProjectFromEntrypoint(entrypoint, config); + try { + project ??= await getProjectFromEntrypoint(entrypoint, config); - return toJsonProject(); + return toJsonProject(); + } catch (e: unknown) { + if (e instanceof Error) { + throw PrettyTypeboxError(e, "Project validation failed"); + } + throw e; + } }); conn.onRequest(GetConfig, () => { diff --git a/src/util.ts b/src/util.ts index cfffe64..befcad7 100644 --- a/src/util.ts +++ b/src/util.ts @@ -1,5 +1,5 @@ import type { Static, TSchema } from "@sinclair/typebox"; -import { Value } from "@sinclair/typebox/value"; +import { AssertError, Value } from "@sinclair/typebox/value"; import ora, { type Ora } from "ora"; import { brightBlue } from "@std/fmt/colors"; @@ -35,3 +35,22 @@ export function getPrompt(): string | null { // Possible sources where projects can be loaded from export type ProjectSource = "local" | "ipfs"; + +export function PrettyTypeboxError( + error: Error, + prefix = "Type Assertion Failed", +): Error { + if ( + error instanceof AssertError || error.constructor.name === "AssertError" + ) { + const errs = [...(error as AssertError).Errors()]; + + let msg = `${prefix}:\n`; + for (const e of errs) { + msg += `\t${e.path}: ${e.message}\n`; + } + return new Error(msg, { cause: error }); + } + + return error; +} diff --git a/subquery-delegator/index.ts b/subquery-delegator/index.ts index 5458108..63633c7 100644 --- a/subquery-delegator/index.ts +++ b/subquery-delegator/index.ts @@ -56,6 +56,7 @@ export const entrypoint: IProjectEntrypoint = { return { tools, + specVersion: "0.0.1", model: "llama3.1", vectorStorage: { type: "lancedb", From bb3e7202b8cee471151bac62113565cef52b0a6b Mon Sep 17 00:00:00 2001 From: Scott Twiname Date: Tue, 15 Oct 2024 20:41:05 +1300 Subject: [PATCH 5/8] New project structure --- deno.lock | 7 + src/app.ts | 42 ++--- src/bundle.ts | 61 +++--- src/index.ts | 2 +- src/info.ts | 60 +++--- src/loader.ts | 216 +++++++++++++++------ src/project/project.ts | 217 +++++++++++++--------- src/runner.ts | 2 +- src/sandbox/index.ts | 40 +--- src/sandbox/mockSandbox.ts | 18 +- src/sandbox/sandbox.ts | 22 +-- src/sandbox/unsafeSandbox.ts | 46 ++--- src/sandbox/webWorker/messages.ts | 11 +- src/sandbox/webWorker/webWorker.ts | 25 +-- src/sandbox/webWorker/webWorkerSandbox.ts | 120 ++++++------ src/util.ts | 33 +++- subquery-delegator/index.ts | 35 ++-- 17 files changed, 526 insertions(+), 431 deletions(-) diff --git a/deno.lock b/deno.lock index 02da684..43fe86c 100644 --- a/deno.lock +++ b/deno.lock @@ -40,6 +40,7 @@ "npm:@sinclair/typebox@~0.33.11": "0.33.11", "npm:@types/estree@*": "1.0.5", "npm:@types/mdast@*": "4.0.4", + "npm:@types/node@*": "22.5.4", "npm:@types/yargs@*": "17.0.33", "npm:apache-arrow@17": "17.0.0", "npm:chalk@^5.3.0": "5.3.0", @@ -387,6 +388,12 @@ "undici-types" ] }, + "@types/node@22.5.4": { + "integrity": "sha512-FDuKUJQm/ju9fT/SeX/6+gBzoPzlVCzfzmGkwKvRHQVxi4BntVbyIwf6a4Xn62mrvndLiml6z/UBXIdEVjQLXg==", + "dependencies": [ + "undici-types" + ] + }, "@types/unist@2.0.11": { "integrity": "sha512-CmBKiL6NNo/OqgmMn95Fk9Whlp2mtvIv+KNpQKN2F4SjvrEesubTRWGYSg+BnWZOnlCaSTU1sMpsBOzgbYhnsA==" }, diff --git a/src/app.ts b/src/app.ts index aa09524..0481c2a 100644 --- a/src/app.ts +++ b/src/app.ts @@ -1,4 +1,3 @@ -import { resolve } from "@std/path/resolve"; import ora from "ora"; import { brightMagenta } from "@std/fmt/colors"; import { Ollama } from "ollama"; @@ -11,7 +10,7 @@ import { Context, type IContext } from "./context/context.ts"; import type { ISandbox } from "./sandbox/sandbox.ts"; import * as lancedb from "@lancedb/lancedb"; import type { IPFSClient } from "./ipfs.ts"; -import { loadProject, loadVectorStoragePath } from "./loader.ts"; +import { Loader } from "./loader.ts"; import { getPrompt } from "./util.ts"; export async function runApp(config: { @@ -23,28 +22,23 @@ export async function runApp(config: { forceReload?: boolean; }): Promise { const model = new Ollama({ host: config.host }); - const [projectPath, source] = await loadProject( + + const loader = new Loader( config.projectPath, config.ipfs, undefined, config.forceReload, ); - const sandbox = await getDefaultSandbox(resolve(projectPath), source); - const pendingCtx = makeContext( + const sandbox = await getDefaultSandbox(loader); + + const ctx = await makeContext( sandbox, model, - (dbPath) => - loadVectorStoragePath( - projectPath, - dbPath, - config.ipfs, - undefined, - config.forceReload, - ), + loader, ); - const runnerHost = new RunnerHost(async () => { + const runnerHost = new RunnerHost(() => { const chatStorage = new MemoryChatStorage(); chatStorage.append([{ role: "system", content: sandbox.systemPrompt }]); @@ -53,39 +47,37 @@ export async function runApp(config: { sandbox, chatStorage, model, - await pendingCtx, + ctx, ); }); switch (config.interface) { case "cli": - await pendingCtx; - if (sandbox.userMessage) { - console.log(sandbox.userMessage); - } await cli(runnerHost); break; case "http": default: - http(runnerHost, config.port, pendingCtx); + http(runnerHost, config.port); } } async function makeContext( sandbox: ISandbox, model: Ollama, - loadVectorStoragePath: (vectorStoragePath: string) => Promise, + loader: Loader, ): Promise { - if (!sandbox.vectorStorage) { + if (!sandbox.manifest.vectorStorage) { return new Context(model); } - const { type, path } = sandbox.vectorStorage; + const { type } = sandbox.manifest.vectorStorage; if (type !== "lancedb") { throw new Error("Only lancedb vector storage is supported"); } - const dbPath = await loadVectorStoragePath(path); - const connection = await lancedb.connect(dbPath); + + const loadRes = await loader.getVectorDb(); + if (!loadRes) throw new Error("Failed to load vector db"); + const connection = await lancedb.connect(loadRes[0]); return new Context(model, connection); } diff --git a/src/bundle.ts b/src/bundle.ts index 1a3e1df..d4d7fcd 100644 --- a/src/bundle.ts +++ b/src/bundle.ts @@ -1,4 +1,3 @@ -import { getProjectJson } from "./info.ts"; import { resolve } from "@std/path/resolve"; import { Tar } from "@std/archive/tar"; import { walk } from "@std/fs/walk"; @@ -9,24 +8,39 @@ import type { IPFSClient } from "./ipfs.ts"; // import * as esbuild from "https://deno.land/x/esbuild@v0.24.0/wasm.js"; // import * as esbuild from "esbuild"; import { denoPlugins } from "@luca/esbuild-deno-loader"; -import { getDefaultSandbox } from "./sandbox/index.ts"; import { toReadableStream } from "@std/io/to-readable-stream"; import { readerFromStreamReader } from "@std/io/reader-from-stream-reader"; import { getSpinner } from "./util.ts"; +import { Loader } from "./loader.ts"; export async function publishProject( projectPath: string, ipfs: IPFSClient, - sandboxFactory = getDefaultSandbox, ): Promise { - projectPath = await Deno.realPath(projectPath); - const projectJson = await getProjectJson( - projectPath, - "local", - sandboxFactory, - ); - let code = await generateBundle(projectPath); - const vectorDbPath = projectJson.vectorStorage?.path; + const loader = new Loader(projectPath, ipfs); + + const [_, manifest, source] = await loader.getManifest(); + if (source !== "local") { + throw new Error("Cannot bundle a project that isn't local"); + } + + // Upload project + const [project, projectSource] = await loader.getProject(); + if (projectSource === "local") { + const spinner = getSpinner().start("Publishing project code"); + try { + const code = await generateBundle(project); + const [{ cid: codeCid }] = await ipfs.addFile([code]); + manifest.entry = `ipfs://${codeCid}`; + spinner.succeed("Published project code"); + } catch (e) { + spinner.fail("Failed to publish project code"); + throw e; + } + } + + // Upload vector db + const vectorDbPath = manifest.vectorStorage?.path; if (vectorDbPath) { // Resolve the db path relative to the project const dbPath = resolve(dirname(projectPath), vectorDbPath); @@ -43,11 +57,9 @@ export async function publishProject( const [{ cid }] = await ipfs.addFile([dbBuf]); - code = updateProjectVectorDbPath( - code, - vectorDbPath, - `ipfs://${cid}`, - ); + // Update manifest + manifest.vectorStorage!.path = `ipfs://${cid}`; + spinner.succeed("Published vector db"); } catch (e) { spinner.fail("Failed to publish project vectordb"); @@ -56,8 +68,9 @@ export async function publishProject( } } + // Upload manifest const spinner = getSpinner().start("Publishing project to IPFS"); - const [{ cid }] = await ipfs.addFile([code]); + const [{ cid }] = await ipfs.addFile([JSON.stringify(manifest, null, 2)]); spinner.succeed("Published project to IPFS"); return `ipfs://${cid}`; } @@ -90,20 +103,6 @@ export async function generateBundle(projectPath: string): Promise { } } -/** - * @param code The raw bundled code - * @param currentPath The current db path that will get replaced - * @param newPath The new db path to replace - * @returns Updated raw bundled code - */ -function updateProjectVectorDbPath( - code: string, - currentPath: string, - newPath: string, -): string { - return code.replaceAll(currentPath, newPath); -} - /** * Archives the lancedb directory into a file for uploading * @param dbPath The path to the lancedb directory diff --git a/src/index.ts b/src/index.ts index 8987793..14da932 100755 --- a/src/index.ts +++ b/src/index.ts @@ -1,4 +1,4 @@ -#!/usr/bin/env -S deno run --allow-env --allow-net --allow-sys --allow-read --allow-write --allow-ffi --allow-run --unstable-worker-options --no-prompt +#!/usr/bin/env -S deno run --allow-env --allow-net --allow-sys --allow-read --allow-write --allow-ffi --allow-run --unstable-worker-options // TODO limit --allow-ffi to just lancedb // TODO limit --deny-net on localhost except ollama/db // TODO limit --allow-run needed for Deno.exit diff --git a/src/info.ts b/src/info.ts index fdc661e..55ebe2a 100644 --- a/src/info.ts +++ b/src/info.ts @@ -1,26 +1,31 @@ -import { resolve } from "@std/path/resolve"; import { brightBlue, brightMagenta } from "@std/fmt/colors"; import { getDefaultSandbox } from "./sandbox/index.ts"; -import type { IProject } from "./project/project.ts"; -import type { TSchema } from "@sinclair/typebox"; +import type { ProjectManifest } from "./project/project.ts"; import type { IPFSClient } from "./ipfs.ts"; -import { loadProject } from "./loader.ts"; -import type { ProjectSource } from "./util.ts"; +import { Loader } from "./loader.ts"; + +type StaticProject = ProjectManifest & { + tools?: string[]; + systemPrompt?: string; +}; export async function getProjectJson( - projectPath: string, - source: ProjectSource, + manifest: ProjectManifest, + loader: Loader, sandboxFactory = getDefaultSandbox, -): Promise & { tools: string[]; config?: TSchema }> { - const sandbox = await sandboxFactory(resolve(projectPath), source); +): Promise { + try { + const sandbox = await sandboxFactory(loader); - return { - model: sandbox.model, - config: sandbox.config, - tools: (await sandbox.getTools()).map((t) => t.function.name), - systemPrompt: sandbox.systemPrompt, - vectorStorage: sandbox.vectorStorage, - }; + return { + ...sandbox.manifest, + tools: (await sandbox.getTools()).map((t) => t.function.name), + systemPrompt: sandbox.systemPrompt, + }; + } catch (e) { + console.warn(`Failed to load project: ${e}`); + return manifest; + } } export async function projectInfo( @@ -28,12 +33,13 @@ export async function projectInfo( ipfs: IPFSClient, json = false, ): Promise { - const [loadedPath, source] = await loadProject(projectPath, ipfs); - const projectJson = await getProjectJson(loadedPath, source); + const loader = new Loader(projectPath, ipfs); + const [_, manifest] = await loader.getManifest(); + const staticProject = await getProjectJson(manifest, loader); if (json) { console.log(JSON.stringify( - projectJson, + staticProject, null, 2, )); @@ -41,16 +47,20 @@ export async function projectInfo( } const info: [string, string][] = [ - ["Model", projectJson.model], - ["Conifg", JSON.stringify(projectJson.config, null, 2)], - ["Tools", projectJson.tools.join("\n")], - ["System Prompt", projectJson.systemPrompt], + ["Model", staticProject.model], + ["Conifg", JSON.stringify(staticProject.config, null, 2)], + ["Tools", staticProject.tools?.join("\n") ?? "No Tools found"], + ["System Prompt", staticProject?.systemPrompt ?? "No System Prompt found"], ]; - if (projectJson.vectorStorage) { + if (manifest.endpoints?.length) { + info.push(["Endpoints", manifest.endpoints.join("\n")]); + } + + if (staticProject.vectorStorage) { info.push([ "Vector Storage", - `Type: ${projectJson.vectorStorage.type}\nPath: ${projectJson.vectorStorage.path}`, + `Type: ${staticProject.vectorStorage.type}\nPath: ${staticProject.vectorStorage.path}`, ]); } diff --git a/src/loader.ts b/src/loader.ts index 35af8dd..30a07d1 100644 --- a/src/loader.ts +++ b/src/loader.ts @@ -3,71 +3,72 @@ import { CIDReg, type IPFSClient } from "./ipfs.ts"; import { resolve } from "@std/path/resolve"; import { UntarStream } from "@std/tar"; import { ensureDir, exists } from "@std/fs"; -import { getSpinner, type ProjectSource } from "./util.ts"; +import { getSpinner, type Source, SpinnerLog } from "./util.ts"; +import { ProjectManifest } from "./project/project.ts"; +import { Value } from "@sinclair/typebox/value"; export const getOSTempDir = () => Deno.env.get("TMPDIR") || Deno.env.get("TMP") || Deno.env.get("TEMP") || "/tmp"; -export async function loadProject( - projectPath: string, - ipfs: IPFSClient, - tmpDir?: string, - forceReload?: boolean, -): Promise<[string, ProjectSource]> { - if (CIDReg.test(projectPath)) { - const spinner = getSpinner().start("Loading project from IPFS"); - try { - const cid = projectPath.replace("ipfs://", ""); - - const tmp = resolve(tmpDir ?? getOSTempDir(), cid); - const filePath = resolve(tmp, "index.ts"); - // Early exit if the file has already been fetched - if (!forceReload && (await exists(filePath))) { - spinner.succeed("Loaded project from IPFS"); - return [filePath, "ipfs"]; - } - await ensureDir(tmp); - - const file = await Deno.open(filePath, { create: true, write: true }); +async function loadJson(path: string): Promise { + const decoder = new TextDecoder(); + const data = await Deno.readFile(path); + const raw = decoder.decode(data); - const readable = await ipfs.catStream(cid); - await readable.pipeTo(file.writable); + return JSON.parse(raw); +} - spinner.succeed("Loaded project from IPFS"); +async function loadScript(path: string): Promise { + const { default: raw } = await import(path); + return raw; +} - return [filePath, "ipfs"]; - } catch (e) { - spinner.fail("Failed to load project"); - throw e; - } +/** + * Loads a local manifest file (either json, ts or js) + */ +export async function loadManfiest(path: string): Promise { + let manifest: unknown; + try { + manifest = await loadJson(path); + } catch (_e: unknown) { + manifest = await loadScript(path); } - return [resolve(projectPath), "local"]; + Value.Assert(ProjectManifest, manifest); + + return manifest; } -export async function loadVectorStoragePath( - projectPath: string, - vectorStoragePath: string, +// TODO support tar +/** + * @param path The content path or cid + * @param ipfs The IPFS client to fetch content if from IPFS + * @param tmpDir (optional) The location to cache content, defaults to the OS temp directory + * @param force (optional) If true and the content is from IPFS it will check if its already been fetched + * @param workingPath (optional) If the content is local it will resolve the path relative to this + * @returns + */ +export async function pullContent( + path: string, ipfs: IPFSClient, + fileName: string, tmpDir?: string, - forceReload?: boolean, -): Promise { - if (CIDReg.test(vectorStoragePath)) { - const spinner = getSpinner().start("Loading vector db from IPFS"); - try { - const cid = vectorStoragePath.replace("ipfs://", ""); - const tmp = resolve(tmpDir ?? getOSTempDir(), cid); + force?: boolean, + workingPath?: string, +): Promise<[string, Source]> { + if (CIDReg.test(path)) { + const cid = path.replace("ipfs://", ""); + const tmp = resolve(tmpDir ?? getOSTempDir(), cid); + await ensureDir(tmp); + + if (fileName.endsWith(".gz")) { + const readStream = await ipfs.catStream(cid); - // Early exit if the file has already been fetched - if (!forceReload && (await exists(tmp))) { - spinner.succeed("Loaded vector db from IPFS"); - return tmp; + if (!force && (await exists(tmp))) { + return [tmp, "ipfs"]; } - await ensureDir(tmp); - const readStream = await ipfs.catStream(cid); - for await ( const entry of readStream.pipeThrough(new DecompressionStream("gzip")) .pipeThrough(new UntarStream()) @@ -76,24 +77,123 @@ export async function loadVectorStoragePath( await ensureDir(dirname(path)); await entry.readable?.pipeTo((await Deno.create(path)).writable); } + return [tmp, "ipfs"]; + } else { + const filePath = resolve(tmp, fileName); + // Early exit if the file has already been fetched + if (!force && (await exists(filePath))) { + return [filePath, "ipfs"]; + } + + const file = await Deno.open(filePath, { create: true, write: true }); + + const readable = await ipfs.catStream(cid); + await readable.pipeTo(file.writable); - spinner.succeed("Loaded vector db from IPFS"); - return tmp; - } catch (e) { - spinner.fail("Failed to load vector db"); - throw e; + return [filePath, "ipfs"]; } } try { - const uri = new URL(vectorStoragePath); + // This should throw if the project is not a valid URL. This allows loading lancedb from gcs/s3 + new URL(path); - if (uri.protocol) { - return vectorStoragePath; - } + return [path, "remote"]; } catch (_e) { // DO nothing } - return resolve(dirname(projectPath), vectorStoragePath); + return [resolve(workingPath ?? "", path), "local"]; +} + +export class Loader { + #ipfs: IPFSClient; + #force: boolean; + + #manifest?: [manifestPath: string, ProjectManifest, Source]; + + constructor( + readonly projectPath: string, + ipfs: IPFSClient, + readonly tmpDir?: string, + force?: boolean, + ) { + this.#ipfs = ipfs; + this.#force = force ?? false; + } + + private async pullContent( + path: string, + fileName: string, + tmpDir = this.tmpDir, + workingPath?: string, + ): Promise<[string, Source]> { + return await pullContent( + path, + this.#ipfs, + fileName, + tmpDir, + this.#force, + workingPath, + ); + } + + // @SpinnerLog({ start: "Loading project manifest", success: "Loaded project manifest", fail: "Failed to load project manfiest"}) + async getManifest(): Promise<[string, ProjectManifest, Source]> { + if (!this.#manifest) { + const spinner = getSpinner().start("Loading project manifest"); + try { + const [manifestPath, source] = await this.pullContent( + this.projectPath, + "manifest.json", + ); + + const manifest = await loadManfiest(manifestPath); + + this.#manifest = [manifestPath, manifest, source]; + spinner.succeed("Loaded project manifest"); + } catch (e) { + spinner.fail("Failed to load project manifest"); + throw e; + } + } + return this.#manifest; + } + + @SpinnerLog({ + start: "Loading project", + success: "Loaded project", + fail: "Failed to load project", + }) + async getProject(): Promise<[string, Source]> { + const [manifestPath, manifest, manifestSource] = await this.getManifest(); + const [projectPath, source] = await this.pullContent( + manifest.entry, + "project.ts", + dirname(manifestPath), + manifestSource == "local" ? dirname(this.projectPath) : undefined, + ); + return [projectPath, source]; + } + + @SpinnerLog({ + start: "Loading vector db", + success: "Loaded vector db", + fail: "Failed to load vector db", + }) + async getVectorDb(): Promise<[string, Source] | undefined> { + const [manifestPath, manifest, manifestSource] = await this.getManifest(); + if (!manifest.vectorStorage?.path) { + return undefined; + } + + // TODO resovle local paths + const res = await this.pullContent( + manifest.vectorStorage.path, + "db.gz", + dirname(manifestPath), + manifestSource == "local" ? dirname(this.projectPath) : undefined, + ); + return res; + } } diff --git a/src/project/project.ts b/src/project/project.ts index 99672f1..fd40d0c 100644 --- a/src/project/project.ts +++ b/src/project/project.ts @@ -1,16 +1,7 @@ -import { - type Static, - type TFunction, - type TObject, - type TPromise, - type TSchema, - type TUndefined, - type TUnion, - Type, -} from "@sinclair/typebox"; +import { type Static, Type } from "@sinclair/typebox"; import { Value } from "@sinclair/typebox/value"; -import { loadConfigFromEnv } from "../util.ts"; import { ContextType } from "../context/context.ts"; +import { loadRawConfigFromEnv } from "../util.ts"; // TODO link this to the types defined in tool export const FunctionToolType = Type.Object({ @@ -31,93 +22,135 @@ export const VectorConfig = Type.Object({ type: Type.Literal("lancedb"), }); -export const Project = Type.Object({ - // Note: If a new spec version is introduced Type.TemplateLiteral could be used here - specVersion: Type.Literal( - "0.0.1", - { description: "The specification version of the project structure." }, - ), - model: Type.String({ - description: "The llm model to use", - }), - embedModel: Type.Optional(Type.String({ - description: "The model used to generate embeddings queries", - })), - systemPrompt: Type.String({ - description: "The initial system prompt of the app", - }), - userMessage: Type.Optional(Type.String({ - description: "An initial message to present to the user", +export const ProjectManifest = Type.Object({ + specVersion: Type.Literal("0.0.1"), + model: Type.String(), + entry: Type.String(), + vectorStorage: Type.Optional(Type.Object({ + type: Type.String(), + path: Type.String(), })), + endpoints: Type.Optional(Type.Array(Type.String())), + config: Type.Optional(Type.Any()), // TODO how can this be a JSON Schema type? +}); + +export const Project = Type.Object({ tools: Type.Array(FunctionToolType), - vectorStorage: Type.Optional(VectorConfig), + systemPrompt: Type.String(), }); -export type IFunctionTool = Static; -export type IVectorConfig = Static; - -type IProjectEntry = TUnion<[ - TObject< - { - configType: TSchema; - projectFactory: TFunction<[Config], TPromise>; - } - >, - TObject< - { - configType: TUndefined; - projectFactory: TFunction<[], TPromise>; - } - >, -]>; - -export type IProject = Static; -export type IProjectEntrypoint = Static< - IProjectEntry ->; - -export function validateProject(project: unknown): project is IProject { +export const ProjectEntry = Type.Function( + [Type.Any()], + Type.Union([Project, Type.Promise(Project)]), +); + +export type ProjectManifest = Static; +export type Project = Static; +export type ProjectEntry = Static; + +export async function loadProject( + manifest: ProjectManifest, + entry: ProjectEntry, + config?: Record, +): Promise { + const cfg = loadRawConfigFromEnv(manifest.config, config); + + const project = await entry(cfg); Value.Assert(Project, project); - return true; + + return project; } -function validateProjectEntry(entry: unknown): entry is IProjectEntry { - // deno-lint-ignore no-explicit-any - const projectType = ProjectEntrypointGen((entry as any)?.configType); +// export const Project = Type.Object({ +// // Note: If a new spec version is introduced Type.TemplateLiteral could be used here +// specVersion: Type.Literal( +// "0.0.1", +// { description: "The specification version of the project structure." }, +// ), +// model: Type.String({ +// description: "The llm model to use", +// }), +// endpoints: Type.Optional(Type.Array(Type.String({ +// description: 'Endpoints the project would like to access', +// }))), +// embedModel: Type.Optional(Type.String({ +// description: "The model used to generate embeddings queries", +// })), +// systemPrompt: Type.String({ +// description: "The initial system prompt of the app", +// }), +// userMessage: Type.Optional(Type.String({ +// description: "An initial message to present to the user", +// })), +// tools: Type.Array(FunctionToolType), +// vectorStorage: Type.Optional(VectorConfig), +// }); - Value.Assert(projectType, entry); - return true; -} +// export type IFunctionTool = Static; +// export type IVectorConfig = Static; -const ProjectEntrypointGen = (t: T) => - Type.Union([ - Type.Object({ - configType: Type.Any(), - projectFactory: Type.Function([t], Type.Promise(Project)), - }), - Type.Object({ - // configType: Type.Undefined(), - projectFactory: Type.Function([], Type.Promise(Project)), - }), - ]); - -export async function getProjectFromEntrypoint( - entrypoint: unknown, - providedConfig?: Record, -): Promise { - if (!entrypoint) { - throw new Error("Project entry is invalid"); - } - // Validate the entrypoint - if (validateProjectEntry(entrypoint)) { - const config = loadConfigFromEnv(entrypoint.configType, providedConfig); - - // Check that the constructed project is valid - const project = await entrypoint.projectFactory(config); - - if (validateProject(project)) { - return project; - } - } - throw new Error("Unable to validate project"); -} +// type IProjectEntry = TUnion<[ +// TObject< +// { +// configType: TSchema; +// projectFactory: TFunction<[Config], TPromise>; +// } +// >, +// TObject< +// { +// configType: TUndefined; +// projectFactory: TFunction<[], TPromise>; +// } +// >, +// ]>; + +// export type IProject = Static; +// export type IProjectEntrypoint = Static< +// IProjectEntry +// >; + +// export function validateProject(project: unknown): project is IProject { +// Value.Assert(Project, project); +// return true; +// } + +// function validateProjectEntry(entry: unknown): entry is IProjectEntry { +// // deno-lint-ignore no-explicit-any +// const projectType = ProjectEntrypointGen((entry as any)?.configType); + +// Value.Assert(projectType, entry); +// return true; +// } + +// const ProjectEntrypointGen = (t: T) => +// Type.Union([ +// Type.Object({ +// configType: Type.Any(), +// projectFactory: Type.Function([t], Type.Promise(Project)), +// }), +// Type.Object({ +// // configType: Type.Undefined(), +// projectFactory: Type.Function([], Type.Promise(Project)), +// }), +// ]); + +// export async function getProjectFromEntrypoint( +// entrypoint: unknown, +// providedConfig?: Record, +// ): Promise { +// if (!entrypoint) { +// throw new Error("Project entry is invalid"); +// } +// // Validate the entrypoint +// if (validateProjectEntry(entrypoint)) { +// const config = loadConfigFromEnv(entrypoint.configType, providedConfig); + +// // Check that the constructed project is valid +// const project = await entrypoint.projectFactory(config); + +// if (validateProject(project)) { +// return project; +// } +// } +// throw new Error("Unable to validate project"); +// } diff --git a/src/runner.ts b/src/runner.ts index 13fc6f1..ea06ecc 100644 --- a/src/runner.ts +++ b/src/runner.ts @@ -19,7 +19,7 @@ export class Runner { private async runChat(messages: Message[]): Promise { const res = await this.#ollama.chat({ - model: this.sandbox.model, + model: this.sandbox.manifest.model, stream: false, tools: await this.sandbox.getTools(), // TODO should there be a limit to the number of items in the chat history? diff --git a/src/sandbox/index.ts b/src/sandbox/index.ts index 30b051a..d194cd2 100644 --- a/src/sandbox/index.ts +++ b/src/sandbox/index.ts @@ -1,44 +1,18 @@ -import type { ProjectSource } from "../util.ts"; +import type { Loader } from "../loader.ts"; import type { ISandbox } from "./sandbox.ts"; -import { - type Permissions, - WebWorkerSandbox, -} from "./webWorker/webWorkerSandbox.ts"; +// import { UnsafeSandbox } from "./unsafeSandbox.ts"; +import { WebWorkerSandbox } from "./webWorker/webWorkerSandbox.ts"; export * from "./sandbox.ts"; export * from "./mockSandbox.ts"; export * from "./unsafeSandbox.ts"; -const IPFS_PERMISSIONS: Permissions = { - allowRead: false, - allowFFI: false, -}; - -const LOCAL_PERMISSIONS: Permissions = { - allowRead: true, - allowFFI: true, -}; - -function getPermisionsForSource(source: ProjectSource): Permissions { - switch (source) { - case "local": - return LOCAL_PERMISSIONS; - case "ipfs": - return IPFS_PERMISSIONS; - default: - throw new Error( - `Unable to set permissions for unknown source: ${source}`, - ); - } -} - export function getDefaultSandbox( - path: string, - source: ProjectSource, + loader: Loader, ): Promise { - // return UnsafeSandbox.create(path); - const permissions = getPermisionsForSource(source); - return WebWorkerSandbox.create(path, permissions); + // return UnsafeSandbox.create(loader); + + return WebWorkerSandbox.create(loader); } export { WebWorkerSandbox }; diff --git a/src/sandbox/mockSandbox.ts b/src/sandbox/mockSandbox.ts index 6207e0e..9efe8a4 100644 --- a/src/sandbox/mockSandbox.ts +++ b/src/sandbox/mockSandbox.ts @@ -1,24 +1,24 @@ -import type { TSchema } from "@sinclair/typebox"; import type { IContext } from "../context/context.ts"; -import type { ITool } from "../tool.ts"; import type { ISandbox } from "./sandbox.ts"; +import type { Project, ProjectManifest } from "../project/project.ts"; export class MockSandbox implements ISandbox { constructor( - readonly model: string, - readonly systemPrompt: string, - private tools: ITool[], - readonly userMessage?: string, - readonly config?: TSchema, + readonly manifest: ProjectManifest, + readonly project: Project, ) {} + get systemPrompt(): string { + return this.project.systemPrompt; + } + // deno-lint-ignore require-await async getTools() { - return this.tools.map((t) => t.toTool()); + return this.project.tools.map((t) => t.toTool()); } runTool(toolName: string, args: unknown, ctx: IContext): Promise { - const tool = this.tools.find((t) => t.name === toolName); + const tool = this.project.tools.find((t) => t.name === toolName); if (!tool) { throw new Error(`Tool not found: ${toolName}`); diff --git a/src/sandbox/sandbox.ts b/src/sandbox/sandbox.ts index d7b977f..850ca69 100644 --- a/src/sandbox/sandbox.ts +++ b/src/sandbox/sandbox.ts @@ -1,34 +1,16 @@ import type { Tool } from "ollama"; -import type { TSchema } from "@sinclair/typebox"; import type { IContext } from "../context/context.ts"; -import type { IVectorConfig } from "../project/project.ts"; +import type { ProjectManifest } from "../project/project.ts"; /** * The sandbox provides a defined interface to run untrusted code */ export interface ISandbox { - /** - * The ollama model that will be used for the project - */ - model: string; + manifest: ProjectManifest; - /** - * The initial system prompt, this sets the global context - */ systemPrompt: string; - /** - * An optional message that can be presented to users at the start of a chat - */ - userMessage?: string; - - vectorStorage?: IVectorConfig; - - config?: TSchema; - getTools(): Promise; runTool(toolName: string, args: unknown, ctx: IContext): Promise; - - // TODO expand this interface with more untrusted data/functions. e.g RAG } diff --git a/src/sandbox/unsafeSandbox.ts b/src/sandbox/unsafeSandbox.ts index 6005b05..62cd58c 100644 --- a/src/sandbox/unsafeSandbox.ts +++ b/src/sandbox/unsafeSandbox.ts @@ -1,60 +1,48 @@ import type { Tool } from "ollama"; import { - getProjectFromEntrypoint, - type IProject, - type IVectorConfig, - validateProject, + loadProject, + type Project, + type ProjectManifest, } from "../project/project.ts"; import type { ISandbox } from "./sandbox.ts"; import type { IContext } from "../context/context.ts"; +import type { Loader } from "../loader.ts"; /** * This class is considered unsafe as users code is directly required */ export class UnsafeSandbox implements ISandbox { - #project: IProject; + #project: Project; - public static async create(path: string): Promise { - const p = await import(path); + public static async create(loader: Loader): Promise { + const [_, manifest] = await loader.getManifest(); + const [projectPath] = await loader.getProject(); - const project = await getProjectFromEntrypoint(p.entrypoint); + const { default: entry } = await import(projectPath); - return new UnsafeSandbox(project); - } - - private constructor(project: IProject) { - this.#project = project; + const project = await loadProject(manifest, entry); - validateProject(this.#project); + return new UnsafeSandbox(manifest, project); } - get model(): string { - return this.#project.model; + private constructor( + readonly manifest: ProjectManifest, + readonly project: Project, + ) { + this.#project = project; } - // get config(): TSchema { - // return this.#project.config; - // } - get systemPrompt(): string { return this.#project.systemPrompt; } - get userMessage(): string | undefined { - return this.#project.userMessage; - } - - get vectorStorage(): IVectorConfig | undefined { - return this.#project.vectorStorage; - } - // deno-lint-ignore require-await async getTools(): Promise { return this.#project.tools.map((t) => t.toTool()); } runTool(toolName: string, args: unknown, ctx: IContext): Promise { - const tool = this.#project.tools.find((t) => t.name === toolName); + const tool = this.project.tools.find((t) => t.name === toolName); if (!tool) { throw new Error(`Tool not found: ${toolName}`); diff --git a/src/sandbox/webWorker/messages.ts b/src/sandbox/webWorker/messages.ts index 5cb8bc2..f09e2e7 100644 --- a/src/sandbox/webWorker/messages.ts +++ b/src/sandbox/webWorker/messages.ts @@ -1,20 +1,17 @@ import type { Tool } from "ollama"; -import type { TSchema } from "@sinclair/typebox"; import * as rpc from "vscode-jsonrpc"; -import type { IProject } from "../../project/project.ts"; +import type { ProjectManifest } from "../../project/project.ts"; -export type IProjectJson = Omit & { tools: Tool[] }; +export type IProjectJson = { tools: Tool[]; systemPrompt: string }; // Framework -> Sandbox export const Load = new rpc.RequestType("load"); -export const Init = new rpc.RequestType< +export const Init = new rpc.RequestType2< + ProjectManifest, Record, IProjectJson, string >("init"); -export const GetConfig = new rpc.RequestType0( - "get_config", -); export const CallTool = new rpc.RequestType2( "call_tool", ); diff --git a/src/sandbox/webWorker/webWorker.ts b/src/sandbox/webWorker/webWorker.ts index 88f544d..fbdd3a8 100644 --- a/src/sandbox/webWorker/webWorker.ts +++ b/src/sandbox/webWorker/webWorker.ts @@ -7,27 +7,23 @@ import { CallTool, CtxComputeQueryEmbedding, CtxVectorSearch, - GetConfig, Init, type IProjectJson, Load, } from "./messages.ts"; -import { - getProjectFromEntrypoint, - type IProject, - type IProjectEntrypoint, -} from "../../project/project.ts"; +import type { Project, ProjectEntry } from "../../project/project.ts"; import type { IContext } from "../../context/context.ts"; import { PrettyTypeboxError } from "../../util.ts"; +import { loadProject } from "../../project/project.ts"; const conn = rpc.createMessageConnection( new BrowserMessageReader(self), new BrowserMessageWriter(self), ); -let entrypoint: IProjectEntrypoint; -let project: IProject; +let entrypoint: ProjectEntry; +let project: Project; const context = { vectorSearch: (table, vectors) => @@ -45,16 +41,16 @@ function toJsonProject(): IProjectJson { } conn.onRequest(Load, async (path) => { - entrypoint ??= (await import(path)).entrypoint; + entrypoint ??= (await import(path)).default; }); -conn.onRequest(Init, async (config) => { +conn.onRequest(Init, async (manifest, config) => { if (!entrypoint) { throw new Error("Please call `load` first"); } try { - project ??= await getProjectFromEntrypoint(entrypoint, config); + project ??= await loadProject(manifest, entrypoint, config); return toJsonProject(); } catch (e: unknown) { @@ -65,13 +61,6 @@ conn.onRequest(Init, async (config) => { } }); -conn.onRequest(GetConfig, () => { - if (!entrypoint) { - throw new Error("Project is not initialized"); - } - return entrypoint.configType; -}); - conn.onRequest(CallTool, (toolName, args) => { if (!project) { throw new Error("Project is not initialized"); diff --git a/src/sandbox/webWorker/webWorkerSandbox.ts b/src/sandbox/webWorker/webWorkerSandbox.ts index 90500fe..0f02f9c 100644 --- a/src/sandbox/webWorker/webWorkerSandbox.ts +++ b/src/sandbox/webWorker/webWorkerSandbox.ts @@ -1,5 +1,4 @@ import type { Tool } from "ollama"; -import type { TSchema } from "@sinclair/typebox"; import * as rpc from "vscode-jsonrpc"; import { BrowserMessageReader, @@ -10,14 +9,13 @@ import { CallTool, CtxComputeQueryEmbedding, CtxVectorSearch, - GetConfig, Init, Load, } from "./messages.ts"; -import { loadConfigFromEnv } from "../../util.ts"; -import { FromSchema } from "../../fromSchema.ts"; +import { loadRawConfigFromEnv, type Source } from "../../util.ts"; import type { IContext } from "../../context/context.ts"; -import type { IVectorConfig } from "../../project/project.ts"; +import type { ProjectManifest } from "../../project/project.ts"; +import type { Loader } from "../../loader.ts"; import { dirname } from "@std/path/dirname"; export type Permissions = { @@ -29,25 +27,53 @@ export type Permissions = { allowFFI?: boolean; }; +const IPFS_PERMISSIONS = (dir?: string): Deno.PermissionOptionsObject => ({ + read: dir ? [dirname(dir)] : false, // Allow the cache dir + ffi: false, +}); + +const LOCAL_PERMISSIONS: Deno.PermissionOptionsObject = { + read: true, + ffi: true, +}; + +function getPermisionsForSource( + source: Source, + projectDir: string, +): Deno.PermissionOptionsObject { + switch (source) { + case "local": + return LOCAL_PERMISSIONS; + case "ipfs": + return IPFS_PERMISSIONS(projectDir); + default: + throw new Error( + `Unable to set permissions for unknown source: ${source}`, + ); + } +} + export class WebWorkerSandbox implements ISandbox { #connection: rpc.MessageConnection; - #config: TSchema | undefined; + #tools: Tool[]; public static async create( - path: string, - permissions?: Permissions, + loader: Loader, ): Promise { + const [manifestPath, manifest, source] = await loader.getManifest(); + + const permissions = getPermisionsForSource(source, manifestPath); + const w = new Worker( import.meta.resolve("./webWorker.ts"), { type: "module", deno: { permissions: { - env: false, // Should be passed through in loadConfigFromEnv below - net: "inherit", // TODO remove localhost - ffi: permissions?.allowFFI ?? false, // Needed for node js ffi, TODO this could be the same as read permissions - read: permissions?.allowRead ? true : [dirname(path)], + ...permissions, + env: true, // Should be passed through in loadConfigFromEnv below + net: manifest.endpoints, // TODO add config endpoints run: false, write: false, }, @@ -62,45 +88,33 @@ export class WebWorkerSandbox implements ISandbox { ); conn.listen(); - await conn.sendRequest(Load, path); - const rawConfigType = await conn.sendRequest(GetConfig); + const [entryPath] = await loader.getProject(); + await conn.sendRequest(Load, entryPath); - // Need to restore the config and make it compatible as it uses symbols internally - const configType = rawConfigType - // @ts-ignore functionally works but types are too complex - ? FromSchema(JSON.parse(JSON.stringify(rawConfigType))) - : undefined; - const config = loadConfigFromEnv(configType); - const project = await conn.sendRequest(Init, config); + const config = loadRawConfigFromEnv(manifest.config); + const { tools, systemPrompt } = await conn.sendRequest( + Init, + manifest, + config, + ); return new WebWorkerSandbox( conn, - configType, - project.model, - project.systemPrompt, - project.tools, - project.userMessage, - project.vectorStorage, + manifest, + systemPrompt, + tools, ); } private constructor( connection: rpc.MessageConnection, - config: TSchema | undefined, - readonly model: string, + readonly manifest: ProjectManifest, readonly systemPrompt: string, tools: Tool[], - readonly userMessage?: string, - readonly vectorStorage?: IVectorConfig, ) { - this.#connection = connection; this.#tools = tools; - this.#config = config; - } - - get config(): TSchema | undefined { - return this.#config; + this.#connection = connection; } // deno-lint-ignore require-await @@ -108,22 +122,22 @@ export class WebWorkerSandbox implements ISandbox { return this.#tools; } - #hasSetupCxt = false; - private setupCtxMethods(ctx: IContext) { - if (this.#hasSetupCxt) return; - // Connect up context so sandbox can call application - this.#connection.onRequest(CtxVectorSearch, async (tableName, vector) => { - const res = await ctx.vectorSearch(tableName, vector); - - // lancedb returns classes (Apache Arrow - Struct Row). It needs to be made serializable - // This is done here as its specific to the webworker sandbox - return res.map((r) => JSON.parse(JSON.stringify(r))); - }); - this.#connection.onRequest(CtxComputeQueryEmbedding, async (query) => { - return await ctx.computeQueryEmbedding(query); - }); - this.#hasSetupCxt = true; - } + // #hasSetupCxt = false; + // private setupCtxMethods(ctx: IContext) { + // if (this.#hasSetupCxt) return; + // // Connect up context so sandbox can call application + // this.#connection.onRequest(CtxVectorSearch, async (tableName, vector) => { + // const res = await ctx.vectorSearch(tableName, vector); + + // // lancedb returns classes (Apache Arrow - Struct Row). It needs to be made serializable + // // This is done here as its specific to the webworker sandbox + // return res.map((r) => JSON.parse(JSON.stringify(r))); + // }); + // this.#connection.onRequest(CtxComputeQueryEmbedding, async (query) => { + // return await ctx.computeQueryEmbedding(query); + // }); + // this.#hasSetupCxt = true; + // } runTool(toolName: string, args: unknown, ctx: IContext): Promise { // Connect up context so sandbox can call application diff --git a/src/util.ts b/src/util.ts index befcad7..68ac2c2 100644 --- a/src/util.ts +++ b/src/util.ts @@ -1,13 +1,15 @@ -import type { Static, TSchema } from "@sinclair/typebox"; import { AssertError, Value } from "@sinclair/typebox/value"; import ora, { type Ora } from "ora"; import { brightBlue } from "@std/fmt/colors"; +import { FromSchema } from "./fromSchema.ts"; -export function loadConfigFromEnv( - schema?: T, +export function loadRawConfigFromEnv( + rawSchema?: unknown, envObj?: Record, -): Static | undefined { - if (!schema) return undefined; +) { + if (!rawSchema) return undefined; + // @ts-ignore functionally works but types are too complex + const schema = FromSchema(rawSchema); envObj ??= Deno.env.toObject(); return Value.Parse(schema, envObj); } @@ -34,7 +36,7 @@ export function getPrompt(): string | null { } // Possible sources where projects can be loaded from -export type ProjectSource = "local" | "ipfs"; +export type Source = "local" | "ipfs" | "remote"; export function PrettyTypeboxError( error: Error, @@ -54,3 +56,22 @@ export function PrettyTypeboxError( return error; } + +export function SpinnerLog( + messages: { start: string; success: string; fail: string }, +) { + // deno-lint-ignore no-explicit-any + return function (fn: any, _ctx: ClassMethodDecoratorContext) { + return async function (...args: unknown[]) { + const spinner = getSpinner().start(messages.start); + try { + const v = await fn.apply(this, ...args); + spinner.succeed(messages.success); + return v; + } catch (e) { + spinner.fail(messages.fail); + throw e; + } + }; + }; +} diff --git a/subquery-delegator/index.ts b/subquery-delegator/index.ts index 63633c7..38c0642 100644 --- a/subquery-delegator/index.ts +++ b/subquery-delegator/index.ts @@ -9,9 +9,9 @@ import { UnclaimedDelegatorRewards, } from "./tools.ts"; import { type Static, Type } from "npm:@sinclair/typebox"; -import type { IProjectEntrypoint } from "../src/project/project.ts"; +import type { Project, ProjectEntry } from "../src/project/project.ts"; -const ConfigType = Type.Object({ +export const ConfigType = Type.Object({ GRAPHQL_ENDPOINT: Type.String({ default: "https://gateway.subquery.network/query/QmcoJLxSeBnGwtmtNmWFCRusXVTGjYWCK1LoujthZ2NyGP", @@ -24,7 +24,7 @@ const ConfigType = Type.Object({ }), }); -type Config = Static; +export type Config = Static; const PROMPT = ` You are an agent designed to help a user with their token delegation on the SubQuery Network. @@ -37,11 +37,10 @@ All token amounts are in SQT. If the question seems to be unrelated to the API, just return "I don't know" as the answer. `; -export const entrypoint: IProjectEntrypoint = { - configType: ConfigType, - // deno-lint-ignore require-await - projectFactory: async (config: Config) => { - const tools = [ +// deno-lint-ignore require-await +const entrypoint: ProjectEntry = async (config: Config): Promise => { + return { + tools: [ new TotalDelegation(config.GRAPHQL_ENDPOINT), new DelegatedIndexers(config.GRAPHQL_ENDPOINT), new UnclaimedDelegatorRewards(config.GRAPHQL_ENDPOINT), @@ -52,23 +51,13 @@ export const entrypoint: IProjectEntrypoint = { config.BASE_SQT_ADDR, ), new SubqueryDocs(), - ]; - - return { - tools, - specVersion: "0.0.1", - model: "llama3.1", - vectorStorage: { - type: "lancedb", - path: "../.db", - }, - systemPrompt: PROMPT, - userMessage: - "Welcome to the SubQuery Delegator Agent! How can I help you today?", - }; - }, + ], + systemPrompt: PROMPT, + }; }; +export default entrypoint; + // Some example messages to ask with this set of tools const _messages = [ // Delegation From fc80732f501d03824e757c6234b731e9ec76c667 Mon Sep 17 00:00:00 2001 From: Scott Twiname Date: Tue, 15 Oct 2024 21:31:38 +1300 Subject: [PATCH 6/8] Fix tests, tidy up --- deno.lock | 31 ++++++++-- src/bundle_test.ts | 2 - src/loader_test.ts | 14 ++--- src/project/project.ts | 97 +----------------------------- src/project/project_test.ts | 53 ++++++++-------- src/sandbox/webWorker/webWorker.ts | 4 +- subquery-delegator/project.ts | 30 +++++++++ 7 files changed, 92 insertions(+), 139 deletions(-) create mode 100644 subquery-delegator/project.ts diff --git a/deno.lock b/deno.lock index 43fe86c..723d335 100644 --- a/deno.lock +++ b/deno.lock @@ -8,23 +8,26 @@ "jsr:@std/archive@*": "0.225.3", "jsr:@std/archive@~0.225.4": "0.225.4", "jsr:@std/assert@0.223": "0.223.0", - "jsr:@std/assert@^1.0.5": "1.0.5", + "jsr:@std/assert@^1.0.5": "1.0.6", + "jsr:@std/assert@^1.0.6": "1.0.6", "jsr:@std/assert@~0.213.1": "0.213.1", "jsr:@std/assert@~0.218.2": "0.218.2", "jsr:@std/bytes@^1.0.2": "1.0.2", "jsr:@std/bytes@~0.218.2": "0.218.2", "jsr:@std/dotenv@~0.225.2": "0.225.2", "jsr:@std/encoding@0.213": "0.213.1", - "jsr:@std/expect@*": "1.0.3", + "jsr:@std/expect@*": "1.0.5", + "jsr:@std/expect@^1.0.4": "1.0.5", "jsr:@std/fmt@^1.0.2": "1.0.2", "jsr:@std/fmt@~0.218.2": "0.218.2", "jsr:@std/fs@*": "1.0.3", "jsr:@std/fs@^1.0.3": "1.0.3", "jsr:@std/fs@~0.218.2": "0.218.2", - "jsr:@std/internal@^1.0.3": "1.0.3", + "jsr:@std/internal@^1.0.3": "1.0.4", + "jsr:@std/internal@^1.0.4": "1.0.4", "jsr:@std/io@*": "0.224.8", "jsr:@std/io@~0.218.2": "0.218.2", - "jsr:@std/io@~0.224.8": "0.224.8", + "jsr:@std/io@~0.224.8": "0.224.9", "jsr:@std/io@~0.224.9": "0.224.9", "jsr:@std/json@~0.213.1": "0.213.1", "jsr:@std/jsonc@0.213": "0.213.1", @@ -115,7 +118,13 @@ "@std/assert@1.0.5": { "integrity": "e37da8e4033490ce613eec4ac1d78dba1faf5b02a3f6c573a28f15365b9b440f", "dependencies": [ - "jsr:@std/internal" + "jsr:@std/internal@^1.0.3" + ] + }, + "@std/assert@1.0.6": { + "integrity": "1904c05806a25d94fe791d6d883b685c9e2dcd60e4f9fc30f4fc5cf010c72207", + "dependencies": [ + "jsr:@std/internal@^1.0.4" ] }, "@std/bytes@0.218.2": { @@ -134,7 +143,14 @@ "integrity": "d9cbd03323ef7feafd1e969ed85d5edb04ebbd9937b0fe7a52d5ff53be8e913a", "dependencies": [ "jsr:@std/assert@^1.0.5", - "jsr:@std/internal" + "jsr:@std/internal@^1.0.3" + ] + }, + "@std/expect@1.0.5": { + "integrity": "8c7ac797e2ffe57becc6399c0f2fd06230cb9ef124d45229c6e592c563824af1", + "dependencies": [ + "jsr:@std/assert@^1.0.6", + "jsr:@std/internal@^1.0.4" ] }, "@std/fmt@0.218.2": { @@ -155,6 +171,9 @@ "@std/internal@1.0.3": { "integrity": "208e9b94a3d5649bd880e9ca38b885ab7651ab5b5303a56ed25de4755fb7b11e" }, + "@std/internal@1.0.4": { + "integrity": "62e8e4911527e5e4f307741a795c0b0a9e6958d0b3790716ae71ce085f755422" + }, "@std/io@0.218.2": { "integrity": "c64fbfa087b7c9d4d386c5672f291f607d88cb7d44fc299c20c713e345f2785f", "dependencies": [ diff --git a/src/bundle_test.ts b/src/bundle_test.ts index 4241ec3..e96085c 100644 --- a/src/bundle_test.ts +++ b/src/bundle_test.ts @@ -1,6 +1,5 @@ import { generateBundle, publishProject } from "./bundle.ts"; import { expect } from "jsr:@std/expect"; -import { UnsafeSandbox } from "./sandbox/unsafeSandbox.ts"; import { IPFSClient } from "./ipfs.ts"; Deno.test("Generates a bundle", async () => { @@ -20,7 +19,6 @@ Deno.test("Publishing a project to ipfs", async () => { Authorization: `Bearer: ${Deno.env.get("SUBQL_ACCESS_TOKEN")}`, }, ), - UnsafeSandbox.create, ); // The example project could end up being modified so we only validate the response, not the content diff --git a/src/loader_test.ts b/src/loader_test.ts index 52c0fc3..2512288 100644 --- a/src/loader_test.ts +++ b/src/loader_test.ts @@ -1,5 +1,5 @@ import { expect } from "@std/expect/expect"; -import { getOSTempDir, loadVectorStoragePath } from "./loader.ts"; +import { getOSTempDir, pullContent } from "./loader.ts"; import { resolve } from "@std/path/resolve"; import { IPFSClient } from "./ipfs.ts"; import { tarDir } from "./bundle.ts"; @@ -13,23 +13,23 @@ const ipfs = new IPFSClient( ); Deno.test("Load vector storage from dir", async () => { - const dbPath = await loadVectorStoragePath("", "./.db", ipfs); + const dbPath = await pullContent("./.db", ipfs, ""); expect(dbPath).toBe(resolve("./.db")); }); Deno.test("Load vector storage from cloud storage", async () => { - const dbPath = await loadVectorStoragePath( - "", + const [dbPath] = await pullContent( "s3://my-bucket/lancedb", ipfs, + "", ); expect(dbPath).toBe("s3://my-bucket/lancedb"); }); Deno.test("Load vector storage from LanceDB cloud", async () => { - const dbPath = await loadVectorStoragePath("", "db://my_database", ipfs); + const [dbPath] = await pullContent("db://my_database", ipfs, ""); expect(dbPath).toBe("db://my_database"); }); @@ -42,10 +42,10 @@ Deno.test("Load vector storage from IPFS", async () => { ], } as unknown as IPFSClient; - const dbPath = await loadVectorStoragePath( - "", + const [dbPath] = await pullContent( "ipfs://QmbSzrfrgexP4Fugys356MYmWf3Wvk7kfEMaMNXrDXB2nd", mockIpfs, + "", ); expect(dbPath).toBe( diff --git a/src/project/project.ts b/src/project/project.ts index fd40d0c..0ffb87a 100644 --- a/src/project/project.ts +++ b/src/project/project.ts @@ -50,9 +50,10 @@ export type ProjectEntry = Static; export async function loadProject( manifest: ProjectManifest, - entry: ProjectEntry, + entry: unknown, config?: Record, ): Promise { + Value.Assert(ProjectEntry, entry); const cfg = loadRawConfigFromEnv(manifest.config, config); const project = await entry(cfg); @@ -60,97 +61,3 @@ export async function loadProject( return project; } - -// export const Project = Type.Object({ -// // Note: If a new spec version is introduced Type.TemplateLiteral could be used here -// specVersion: Type.Literal( -// "0.0.1", -// { description: "The specification version of the project structure." }, -// ), -// model: Type.String({ -// description: "The llm model to use", -// }), -// endpoints: Type.Optional(Type.Array(Type.String({ -// description: 'Endpoints the project would like to access', -// }))), -// embedModel: Type.Optional(Type.String({ -// description: "The model used to generate embeddings queries", -// })), -// systemPrompt: Type.String({ -// description: "The initial system prompt of the app", -// }), -// userMessage: Type.Optional(Type.String({ -// description: "An initial message to present to the user", -// })), -// tools: Type.Array(FunctionToolType), -// vectorStorage: Type.Optional(VectorConfig), -// }); - -// export type IFunctionTool = Static; -// export type IVectorConfig = Static; - -// type IProjectEntry = TUnion<[ -// TObject< -// { -// configType: TSchema; -// projectFactory: TFunction<[Config], TPromise>; -// } -// >, -// TObject< -// { -// configType: TUndefined; -// projectFactory: TFunction<[], TPromise>; -// } -// >, -// ]>; - -// export type IProject = Static; -// export type IProjectEntrypoint = Static< -// IProjectEntry -// >; - -// export function validateProject(project: unknown): project is IProject { -// Value.Assert(Project, project); -// return true; -// } - -// function validateProjectEntry(entry: unknown): entry is IProjectEntry { -// // deno-lint-ignore no-explicit-any -// const projectType = ProjectEntrypointGen((entry as any)?.configType); - -// Value.Assert(projectType, entry); -// return true; -// } - -// const ProjectEntrypointGen = (t: T) => -// Type.Union([ -// Type.Object({ -// configType: Type.Any(), -// projectFactory: Type.Function([t], Type.Promise(Project)), -// }), -// Type.Object({ -// // configType: Type.Undefined(), -// projectFactory: Type.Function([], Type.Promise(Project)), -// }), -// ]); - -// export async function getProjectFromEntrypoint( -// entrypoint: unknown, -// providedConfig?: Record, -// ): Promise { -// if (!entrypoint) { -// throw new Error("Project entry is invalid"); -// } -// // Validate the entrypoint -// if (validateProjectEntry(entrypoint)) { -// const config = loadConfigFromEnv(entrypoint.configType, providedConfig); - -// // Check that the constructed project is valid -// const project = await entrypoint.projectFactory(config); - -// if (validateProject(project)) { -// return project; -// } -// } -// throw new Error("Unable to validate project"); -// } diff --git a/src/project/project_test.ts b/src/project/project_test.ts index c01f6ae..2d65436 100644 --- a/src/project/project_test.ts +++ b/src/project/project_test.ts @@ -1,45 +1,44 @@ import { expect } from "jsr:@std/expect"; import { type Static, Type } from "@sinclair/typebox"; -import { getProjectFromEntrypoint } from "./project.ts"; +import { loadProject, type ProjectManifest } from "./project.ts"; Deno.test("loads a valid project WITH a config", async () => { const configType = Type.Object({ TEST_OPT: Type.String({ default: "test-opt" }), }); - await expect(getProjectFromEntrypoint({ - configType, - projectFactory: (config: Static) => + + await expect(loadProject( + { config: JSON.parse(JSON.stringify(configType)) } as ProjectManifest, + (config: Static) => Promise.resolve({ - model: "test-model", systemPrompt: "you are a test behave like a test:" + config.TEST_OPT, tools: [], }), - })).resolves.toEqual({ - model: "test-model", + )).resolves.toEqual({ systemPrompt: "you are a test behave like a test:test-opt", tools: [], }); }); Deno.test("loads a valid project WITHOUT a config", async () => { - await expect(getProjectFromEntrypoint({ - projectFactory: () => + await expect(loadProject( + {} as ProjectManifest, + () => Promise.resolve({ - model: "test-model", systemPrompt: "you are a test behave like a test", tools: [], }), - })).resolves.not.toThrow(); + )).resolves.not.toThrow(); }); Deno.test("handles an invalid entrypoint", async () => { - await expect(getProjectFromEntrypoint(null)).rejects.toThrow( + await expect(loadProject({} as ProjectManifest, null)).rejects.toThrow( "Project entry is invalid", ); - await expect(getProjectFromEntrypoint(undefined)).rejects.toThrow( + await expect(loadProject({} as ProjectManifest, undefined)).rejects.toThrow( "Project entry is invalid", ); - await expect(getProjectFromEntrypoint({})).rejects.toThrow( + await expect(loadProject({} as ProjectManifest, {})).rejects.toThrow( "Expected union value", ); // TODO return better error message }); @@ -48,29 +47,29 @@ Deno.test("handles an invalid conifg", async () => { const configType = Type.Object({ TEST_OPT: Type.String(), }); - await expect(getProjectFromEntrypoint({ - configType, - projectFactory: (config: Static) => + await expect(loadProject( + { config: configType } as ProjectManifest, + (config: Static) => Promise.resolve({ - model: "test-model", - systemPropmt: "you are a test behave like a test:" + config.TEST_OPT, + systemPrompt: "you are a test behave like a test:" + config.TEST_OPT, tools: [], }), - })).rejects.toThrow("Expected required property"); // TODO return better error message + )).rejects.toThrow("Expected required property"); // TODO return better error message }); Deno.test("handles errors calling projectFactory", async () => { - await expect(getProjectFromEntrypoint({ - projectFactory: () => - Promise.reject(new Error("Failed to create a project")), - })).rejects.toThrow("Failed to create a project"); + await expect(loadProject( + {} as ProjectManifest, + () => Promise.reject(new Error("Failed to create a project")), + )).rejects.toThrow("Failed to create a project"); }); Deno.test("handles an invalid project", async () => { - await expect(getProjectFromEntrypoint({ - projectFactory: () => + await expect(loadProject( + {} as ProjectManifest, + () => Promise.resolve({ foo: "bar", }), - })).rejects.toThrow("Expected required property"); // TODO return better error message + )).rejects.toThrow("Expected required property"); // TODO return better error message }); diff --git a/src/sandbox/webWorker/webWorker.ts b/src/sandbox/webWorker/webWorker.ts index fbdd3a8..f8af5de 100644 --- a/src/sandbox/webWorker/webWorker.ts +++ b/src/sandbox/webWorker/webWorker.ts @@ -12,7 +12,7 @@ import { Load, } from "./messages.ts"; -import type { Project, ProjectEntry } from "../../project/project.ts"; +import type { Project } from "../../project/project.ts"; import type { IContext } from "../../context/context.ts"; import { PrettyTypeboxError } from "../../util.ts"; import { loadProject } from "../../project/project.ts"; @@ -22,7 +22,7 @@ const conn = rpc.createMessageConnection( new BrowserMessageWriter(self), ); -let entrypoint: ProjectEntry; +let entrypoint: unknown; let project: Project; const context = { diff --git a/subquery-delegator/project.ts b/subquery-delegator/project.ts new file mode 100644 index 0000000..7ba70f7 --- /dev/null +++ b/subquery-delegator/project.ts @@ -0,0 +1,30 @@ +import { type Config, ConfigType } from "./index.ts"; +import type { ProjectManifest } from "../src/project/project.ts"; +import { Value } from "@sinclair/typebox/value"; + +const defaultConfig = Value.Default(ConfigType, {} as Config) as Config; + +const endpoints = Object.values(defaultConfig) + .filter((v) => typeof v === "string") + .map((v) => { + try { + return new URL(v).hostname; + } catch (_e) { + return undefined; + } + }) + .filter((v) => !!v) as string[]; // Cast should be unnecessary with latest TS versions + +const project: ProjectManifest = { + specVersion: "0.0.1", + endpoints: [...new Set(endpoints)], + vectorStorage: { + type: "lancedb", + path: "../.db", + }, + config: JSON.parse(JSON.stringify(ConfigType)), // Convert to JSON Schema + model: "llama3.1", + entry: "./index.ts", +}; + +export default project; From f42b21d37e6278d7f26f458968fc122c109cfb4e Mon Sep 17 00:00:00 2001 From: Scott Twiname Date: Wed, 16 Oct 2024 10:42:31 +1300 Subject: [PATCH 7/8] Get tests passing --- src/bundle_test.ts | 2 +- src/loader_test.ts | 12 ++++++++---- src/project/project.ts | 7 ++++++- src/project/project_test.ts | 4 ++-- src/util.ts | 1 + 5 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/bundle_test.ts b/src/bundle_test.ts index e96085c..d1a65b8 100644 --- a/src/bundle_test.ts +++ b/src/bundle_test.ts @@ -11,7 +11,7 @@ Deno.test("Generates a bundle", async () => { Deno.test("Publishing a project to ipfs", async () => { // WebWorkers don't work in tests, use the unsafe sandbox instead const cid = await publishProject( - "./subquery-delegator/index.ts", + "./subquery-delegator/project.ts", new IPFSClient( Deno.env.get("IPFS_ENDPOINT") ?? "https://unauthipfs.subquery.network/ipfs/api/v0", diff --git a/src/loader_test.ts b/src/loader_test.ts index 2512288..a2209ec 100644 --- a/src/loader_test.ts +++ b/src/loader_test.ts @@ -13,25 +13,28 @@ const ipfs = new IPFSClient( ); Deno.test("Load vector storage from dir", async () => { - const dbPath = await pullContent("./.db", ipfs, ""); + const [dbPath, source] = await pullContent("./.db", ipfs, ""); expect(dbPath).toBe(resolve("./.db")); + expect(source).toBe("local"); }); Deno.test("Load vector storage from cloud storage", async () => { - const [dbPath] = await pullContent( + const [dbPath, source] = await pullContent( "s3://my-bucket/lancedb", ipfs, "", ); expect(dbPath).toBe("s3://my-bucket/lancedb"); + expect(source).toBe("remote"); }); Deno.test("Load vector storage from LanceDB cloud", async () => { - const [dbPath] = await pullContent("db://my_database", ipfs, ""); + const [dbPath, source] = await pullContent("db://my_database", ipfs, ""); expect(dbPath).toBe("db://my_database"); + expect(source).toBe("remote"); }); Deno.test("Load vector storage from IPFS", async () => { @@ -42,7 +45,7 @@ Deno.test("Load vector storage from IPFS", async () => { ], } as unknown as IPFSClient; - const [dbPath] = await pullContent( + const [dbPath, source] = await pullContent( "ipfs://QmbSzrfrgexP4Fugys356MYmWf3Wvk7kfEMaMNXrDXB2nd", mockIpfs, "", @@ -51,6 +54,7 @@ Deno.test("Load vector storage from IPFS", async () => { expect(dbPath).toBe( resolve(getOSTempDir(), "QmbSzrfrgexP4Fugys356MYmWf3Wvk7kfEMaMNXrDXB2nd"), ); + expect(source).toBe("ipfs"); // Clean up await Deno.remove(dbPath, { recursive: true }); diff --git a/src/project/project.ts b/src/project/project.ts index 0ffb87a..290f6c4 100644 --- a/src/project/project.ts +++ b/src/project/project.ts @@ -53,7 +53,12 @@ export async function loadProject( entry: unknown, config?: Record, ): Promise { - Value.Assert(ProjectEntry, entry); + try { + Value.Assert(ProjectEntry, entry); + } catch (e) { + throw new Error("Project entry is invalid", { cause: e }); + } + const cfg = loadRawConfigFromEnv(manifest.config, config); const project = await entry(cfg); diff --git a/src/project/project_test.ts b/src/project/project_test.ts index 2d65436..4df0840 100644 --- a/src/project/project_test.ts +++ b/src/project/project_test.ts @@ -39,8 +39,8 @@ Deno.test("handles an invalid entrypoint", async () => { "Project entry is invalid", ); await expect(loadProject({} as ProjectManifest, {})).rejects.toThrow( - "Expected union value", - ); // TODO return better error message + "Project entry is invalid", + ); }); Deno.test("handles an invalid conifg", async () => { diff --git a/src/util.ts b/src/util.ts index 68ac2c2..eafeb78 100644 --- a/src/util.ts +++ b/src/util.ts @@ -65,6 +65,7 @@ export function SpinnerLog( return async function (...args: unknown[]) { const spinner = getSpinner().start(messages.start); try { + // @ts-ignore need to apply this function call but unable to type "this" const v = await fn.apply(this, ...args); spinner.succeed(messages.success); return v; From b751034eb426e08b2e1d1d2bf1224f689841adde Mon Sep 17 00:00:00 2001 From: Scott Twiname Date: Wed, 16 Oct 2024 11:19:13 +1300 Subject: [PATCH 8/8] Tidy up --- src/decorators.ts | 58 +++++++++++++++++++++++ src/index.ts | 2 +- src/loader.ts | 40 +++++++--------- src/sandbox/webWorker/webWorkerSandbox.ts | 37 +++++++-------- src/util.ts | 31 ++++++------ subquery-delegator/project.ts | 14 +----- 6 files changed, 108 insertions(+), 74 deletions(-) create mode 100644 src/decorators.ts diff --git a/src/decorators.ts b/src/decorators.ts new file mode 100644 index 0000000..5ba92ea --- /dev/null +++ b/src/decorators.ts @@ -0,0 +1,58 @@ +import { getSpinner } from "./util.ts"; + +/** Creates a logging spinner using Ora for progress on a function */ +export function SpinnerLog( + messages: { start: string; success: string; fail: string }, +) { + // deno-lint-ignore no-explicit-any + return function (fn: any, _ctx: ClassMethodDecoratorContext) { + return function (...args: unknown[]) { + const spinner = getSpinner().start(messages.start); + try { + // @ts-ignore need to apply this function call but unable to type "this" + const v = fn.apply(this, ...args); + + if (v instanceof Promise) { + return v.then((r) => { + spinner.succeed(messages.success); + return r; + }); + } + spinner.succeed(messages.success); + return v; + } catch (e) { + spinner.fail(messages.fail); + throw e; + } + }; + }; +} + +export function Memoize() { + const cache = new Map(); + + // deno-lint-ignore no-explicit-any + return function (fn: any, _ctx: ClassMethodDecoratorContext) { + return function (...args: unknown[]) { + const key = JSON.stringify(args); + + if (cache.has(key)) { + return cache.get(key); + } + + // @ts-ignore need to apply this function call but unable to type "this" + const result = fn.apply(this, args); + + // If the method is async, wait for the promise to resolve + if (result instanceof Promise) { + return result.then((resolvedResult) => { + cache.set(key, resolvedResult); + return resolvedResult; + }); + } + + cache.set(key, result); + return result; + }; + }; +} diff --git a/src/index.ts b/src/index.ts index 14da932..8987793 100755 --- a/src/index.ts +++ b/src/index.ts @@ -1,4 +1,4 @@ -#!/usr/bin/env -S deno run --allow-env --allow-net --allow-sys --allow-read --allow-write --allow-ffi --allow-run --unstable-worker-options +#!/usr/bin/env -S deno run --allow-env --allow-net --allow-sys --allow-read --allow-write --allow-ffi --allow-run --unstable-worker-options --no-prompt // TODO limit --allow-ffi to just lancedb // TODO limit --deny-net on localhost except ollama/db // TODO limit --allow-run needed for Deno.exit diff --git a/src/loader.ts b/src/loader.ts index 30a07d1..7f84f2d 100644 --- a/src/loader.ts +++ b/src/loader.ts @@ -3,9 +3,10 @@ import { CIDReg, type IPFSClient } from "./ipfs.ts"; import { resolve } from "@std/path/resolve"; import { UntarStream } from "@std/tar"; import { ensureDir, exists } from "@std/fs"; -import { getSpinner, type Source, SpinnerLog } from "./util.ts"; +import type { Source } from "./util.ts"; import { ProjectManifest } from "./project/project.ts"; import { Value } from "@sinclair/typebox/value"; +import { Memoize, SpinnerLog } from "./decorators.ts"; export const getOSTempDir = () => Deno.env.get("TMPDIR") || Deno.env.get("TMP") || Deno.env.get("TEMP") || @@ -40,10 +41,10 @@ export async function loadManfiest(path: string): Promise { return manifest; } -// TODO support tar /** * @param path The content path or cid * @param ipfs The IPFS client to fetch content if from IPFS + * @param fileName The name to save the file under, if using .gz exension it will unarchive * @param tmpDir (optional) The location to cache content, defaults to the OS temp directory * @param force (optional) If true and the content is from IPFS it will check if its already been fetched * @param workingPath (optional) If the content is local it will resolve the path relative to this @@ -110,8 +111,6 @@ export class Loader { #ipfs: IPFSClient; #force: boolean; - #manifest?: [manifestPath: string, ProjectManifest, Source]; - constructor( readonly projectPath: string, ipfs: IPFSClient, @@ -138,26 +137,20 @@ export class Loader { ); } - // @SpinnerLog({ start: "Loading project manifest", success: "Loaded project manifest", fail: "Failed to load project manfiest"}) + @Memoize() + @SpinnerLog({ + start: "Loading project manifest", + success: "Loaded project manifest", + fail: "Failed to load project manfiest", + }) async getManifest(): Promise<[string, ProjectManifest, Source]> { - if (!this.#manifest) { - const spinner = getSpinner().start("Loading project manifest"); - try { - const [manifestPath, source] = await this.pullContent( - this.projectPath, - "manifest.json", - ); - - const manifest = await loadManfiest(manifestPath); - - this.#manifest = [manifestPath, manifest, source]; - spinner.succeed("Loaded project manifest"); - } catch (e) { - spinner.fail("Failed to load project manifest"); - throw e; - } - } - return this.#manifest; + const [manifestPath, source] = await this.pullContent( + this.projectPath, + "manifest.json", + ); + + const manifest = await loadManfiest(manifestPath); + return [manifestPath, manifest, source]; } @SpinnerLog({ @@ -187,7 +180,6 @@ export class Loader { return undefined; } - // TODO resovle local paths const res = await this.pullContent( manifest.vectorStorage.path, "db.gz", diff --git a/src/sandbox/webWorker/webWorkerSandbox.ts b/src/sandbox/webWorker/webWorkerSandbox.ts index 0f02f9c..793ce70 100644 --- a/src/sandbox/webWorker/webWorkerSandbox.ts +++ b/src/sandbox/webWorker/webWorkerSandbox.ts @@ -12,7 +12,11 @@ import { Init, Load, } from "./messages.ts"; -import { loadRawConfigFromEnv, type Source } from "../../util.ts"; +import { + extractConfigHostNames, + loadRawConfigFromEnv, + type Source, +} from "../../util.ts"; import type { IContext } from "../../context/context.ts"; import type { ProjectManifest } from "../../project/project.ts"; import type { Loader } from "../../loader.ts"; @@ -62,9 +66,18 @@ export class WebWorkerSandbox implements ISandbox { loader: Loader, ): Promise { const [manifestPath, manifest, source] = await loader.getManifest(); + const config = loadRawConfigFromEnv(manifest.config); const permissions = getPermisionsForSource(source, manifestPath); + // Add any project host names as well as any configured host names + const hostnames = [ + ...new Set( + ...(manifest.endpoints ?? []), + ...extractConfigHostNames(config as Record), + ), + ]; + const w = new Worker( import.meta.resolve("./webWorker.ts"), { @@ -72,8 +85,8 @@ export class WebWorkerSandbox implements ISandbox { deno: { permissions: { ...permissions, - env: true, // Should be passed through in loadConfigFromEnv below - net: manifest.endpoints, // TODO add config endpoints + env: false, // Should be passed through in loadRawConfigFromEnv + net: hostnames, run: false, write: false, }, @@ -92,7 +105,6 @@ export class WebWorkerSandbox implements ISandbox { const [entryPath] = await loader.getProject(); await conn.sendRequest(Load, entryPath); - const config = loadRawConfigFromEnv(manifest.config); const { tools, systemPrompt } = await conn.sendRequest( Init, manifest, @@ -122,23 +134,6 @@ export class WebWorkerSandbox implements ISandbox { return this.#tools; } - // #hasSetupCxt = false; - // private setupCtxMethods(ctx: IContext) { - // if (this.#hasSetupCxt) return; - // // Connect up context so sandbox can call application - // this.#connection.onRequest(CtxVectorSearch, async (tableName, vector) => { - // const res = await ctx.vectorSearch(tableName, vector); - - // // lancedb returns classes (Apache Arrow - Struct Row). It needs to be made serializable - // // This is done here as its specific to the webworker sandbox - // return res.map((r) => JSON.parse(JSON.stringify(r))); - // }); - // this.#connection.onRequest(CtxComputeQueryEmbedding, async (query) => { - // return await ctx.computeQueryEmbedding(query); - // }); - // this.#hasSetupCxt = true; - // } - runTool(toolName: string, args: unknown, ctx: IContext): Promise { // Connect up context so sandbox can call application this.#connection.onRequest(CtxVectorSearch, async (tableName, vector) => { diff --git a/src/util.ts b/src/util.ts index eafeb78..f79fa05 100644 --- a/src/util.ts +++ b/src/util.ts @@ -57,22 +57,21 @@ export function PrettyTypeboxError( return error; } -export function SpinnerLog( - messages: { start: string; success: string; fail: string }, -) { - // deno-lint-ignore no-explicit-any - return function (fn: any, _ctx: ClassMethodDecoratorContext) { - return async function (...args: unknown[]) { - const spinner = getSpinner().start(messages.start); +/** Gets the host names of any urls in a record */ +export function extractConfigHostNames( + config: Record, +): string[] { + const hosts = Object.values(config) + .filter((v) => typeof v === "string") + .map((v) => { try { - // @ts-ignore need to apply this function call but unable to type "this" - const v = await fn.apply(this, ...args); - spinner.succeed(messages.success); - return v; - } catch (e) { - spinner.fail(messages.fail); - throw e; + return new URL(v).hostname; + } catch (_e) { + return undefined; } - }; - }; + }) + .filter((v) => !!v) as string[]; // Cast should be unnecessary with latest TS versions + + // Make unique + return [...new Set(hosts)]; } diff --git a/subquery-delegator/project.ts b/subquery-delegator/project.ts index 7ba70f7..29238c5 100644 --- a/subquery-delegator/project.ts +++ b/subquery-delegator/project.ts @@ -1,23 +1,13 @@ import { type Config, ConfigType } from "./index.ts"; import type { ProjectManifest } from "../src/project/project.ts"; import { Value } from "@sinclair/typebox/value"; +import { extractConfigHostNames } from "../src/util.ts"; const defaultConfig = Value.Default(ConfigType, {} as Config) as Config; -const endpoints = Object.values(defaultConfig) - .filter((v) => typeof v === "string") - .map((v) => { - try { - return new URL(v).hostname; - } catch (_e) { - return undefined; - } - }) - .filter((v) => !!v) as string[]; // Cast should be unnecessary with latest TS versions - const project: ProjectManifest = { specVersion: "0.0.1", - endpoints: [...new Set(endpoints)], + endpoints: extractConfigHostNames(defaultConfig), vectorStorage: { type: "lancedb", path: "../.db",