diff --git a/Dockerfile b/Dockerfile index 2b4364ac..942a706f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -35,6 +35,10 @@ RUN mkdir .sophia # Generate the function schemas RUN npm run functionSchemas +# Needed to avoid the error "fatal: detected dubious ownership in repository at '/home/sophia'" when running git commands +# as the application files are owned by the root user so an agent (which runs as the sophia user) can't modify them. +RUN git config --global --add safe.directory /home/sophia + ENV NODE_ENV=production ENV PORT=8080 EXPOSE 8080 diff --git a/bin/configure b/bin/configure index 19e2ec06..eedc6f2c 100755 --- a/bin/configure +++ b/bin/configure @@ -28,4 +28,26 @@ fi echo Initialising Angular project cd frontend npm install -cd .. \ No newline at end of file +cd .. + +# CLI setup ------------- + +# Setup for bash +if [ -f ~/.bashrc ]; then + if ! grep -q "SOPHIA_HOME" ~/.bashrc; then + echo "\n# Sophia CLI environment" >> ~/.bashrc + echo "export SOPHIA_HOME=$(pwd)" >> ~/.bashrc + echo "export PATH=\$SOPHIA_HOME/bin/path:\$PATH" >> ~/.bashrc + fi +fi + +# Setup for zsh +if [ -f ~/.zshrc ]; then + if ! grep -q "SOPHIA_HOME" ~/.zshrc; then + echo "\n# Sophia CLI environment" >> ~/.zshrc + echo "export SOPHIA_HOME=$(pwd)" >> ~/.zshrc + echo "export PATH=\$SOPHIA_HOME/bin/path:\$PATH" >> ~/.zshrc + fi +fi + +echo "done" diff --git a/bin/path/ss b/bin/path/ss old mode 100644 new mode 100755 index 4ec6a364..b0e50db2 --- a/bin/path/ss +++ b/bin/path/ss @@ -19,5 +19,15 @@ script=$1 shift # Shift the arguments so $@ contains the remaining args CWD=$(pwd) + +# Load NVM +export NVM_DIR="$HOME/.nvm" +[ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh" + +set -x # Run the npm script with any additional arguments and the filesystem arg -(cd "$SOPHIA_HOME" && npm run "$script" -- --fs="${CWD}" "$@") +( + cd "$SOPHIA_HOME" + nvm use + npm run "$script" -- --fs="${CWD}" "$@" +) diff --git a/docs/docs/agent-concepts.md b/docs/docs/agent-concepts.md index fa57ff28..2cd07d79 100644 --- a/docs/docs/agent-concepts.md +++ b/docs/docs/agent-concepts.md @@ -2,6 +2,14 @@ ## Agent categories +We follow a similar naming convention described in [Building Effective Agents](https://www.anthropic.com/research/building-effective-agents) by Anthropic. + +> "Agent" can be defined in several ways. Some customers define agents as fully autonomous systems that operate independently over extended periods, using various tools to accomplish complex tasks. Others use the term to describe more prescriptive implementations that follow predefined workflows. At Anthropic, we categorize all these variations as agentic systems, but draw an important architectural distinction between workflows and agents: +> +> Workflows are systems where LLMs and tools are orchestrated through predefined code paths. +> +> Agents, on the other hand, are systems where LLMs dynamically direct their own processes and tool usage, maintaining control over how they accomplish tasks. + ### 1. Autonomous agents Sophia comes with two autonomous agent types (XML and CodeGen), which applying reasoning to break down @@ -9,17 +17,17 @@ a user request into a plan to be completed by the available function calls. The Slack chatbot uses an autonomous agent to provide a response to a user. -Function calls may be to API integrations or create sub-agents. +Functions may call to API integrations or create sub-agents. ### 2. Workflow agents Workflow agents have the control flow logic defined in code, and the results of the LLM calls -may determine the conditional control flow through the workflow. This includes the Software Developer/Code Editing agents. +may determine the conditional control flow through the workflow. This includes the Software Developer/Code Editing workflow agents. ## Agent context The codebase makes use of `AsyncLocalStorage`, which is similar to `ThreadLocal` in Java and `threading.local()` in Python, -to provide easy lookup of agent state, current user, tool configuration, and default LLMs. +to provide easy lookup of agent state, current user, tool configuration, and default LLMs for both autonomous agents and workflow agents. This requires the agent code to run within a AsyncLocalStorage context. ```typescript diff --git a/docs/docs/autonomous-agents.md b/docs/docs/autonomous-agents.md index b26faabf..0a796782 100644 --- a/docs/docs/autonomous-agents.md +++ b/docs/docs/autonomous-agents.md @@ -1,5 +1,12 @@ # Autonomous AI Agents +- Reasoning/planning inspired from Google's [Self-Discover](https://arxiv.org/abs/2402.03620) and other papers +- Memory and function call history for complex workflows +- Iterative planning with hierarchical task decomposition +- Sandboxed execution of generated code for multi-step function calling and logic +- LLM function schemas auto-generated from source code +- Human-in-the-loop for budget control, agent initiated questions and error handling + Sophia provides two autonomous agents which work to complete the request via a control loop which iteratively (re-)plans and calls the functions available to the agent. At a high level they share the same internal state, agent memory, human-in-the loop, functional calling history etc. diff --git a/docs/docs/chat.md b/docs/docs/chat.md index ec1f6ccb..0ef59309 100644 --- a/docs/docs/chat.md +++ b/docs/docs/chat.md @@ -1,7 +1,29 @@ -# Chat +# AI Chat -A basic chat interface like chatgpt.com or claude.ai is provided where you can select the LLM model (or multi-agent model) used for each message generation. +Sophia provides a chat interface like chatgpt.com or claude.ai. -Attachments and more features are on the roadmap. +## LLM selection + +The LLM model selection can be changed over a conversation. + +The model selection also allows selecting the composite implementations of the LLM interface such as multi-agent debate/review implementations or +fallbacks across multiple. + +## Attachments + +Images and PDF files can be attached to a message. However, it is required that the LLM selected supports all the file/image types +in the new and previous messages, otherwise an error will occur. + +## Keyboard shortcuts + +- **Ctrl - M**: Open the LLM model selection +- **Ctrl - A**: Add attachment +- **Ctrl - I**: Open/close the chat info/settings panel +- **Ctrl - E**: Toggle enter sends the message or adds a new line + + +## Screenshots ![Chats](https://public.trafficguard.ai/sophia/chat.png) diff --git a/docs/docs/cli.md b/docs/docs/cli.md index 6e1282d4..f8244998 100644 --- a/docs/docs/cli.md +++ b/docs/docs/cli.md @@ -5,6 +5,8 @@ There are two main ways to interact with the system: ## Running the server & UI +Run the following commands from the sophia git repo root directory. + ### Local install In one terminal run ```bash @@ -22,16 +24,39 @@ Run `docker compose up` The UI will be available at [http://localhost:4200](http://localhost:4200) -## CLI scripts +## CLI commands + +## Running in Docker + +To run the CLI scripts when using the Docker container, run the script `./bin/container` from the repo root to first open a bash shell inside the Sophia development container. + +### Running outside the repository + +To run Sophia agents/workflows via the CLI script described below, in a folder outside the sophia repository, the script at `bin/path/ss` allows you to invoke the Sophia package.json scripts from any directory. + +The `bin/configure` script will update your shell configuration files to include it in your PATH. + +Either run the `bin/configure` script from the Sophia repository root or copy and run the required section from the script. This will add to your .bashrc and .zshrc files (if they exist) the output of: + +```bash +export SOPHIA_HOME=$(pwd) +export PATH=$SOPHIA_HOME/bin/path:$PATH +``` + +Then from any folder you can run commands like: -To run the CLI scripts when using the Docker container, run the script `./bin/container` to open a bash shell inside the Sophia development container. +`ss query what test frameworks does this repository use` +Where *query* is the Sophia package.json script. For all the examples in the CLI scripts section above you can replace `npm run` with `ss` + + +### CLI scripts There are a number of convenience scripts in the package.json for running agents and other scripts such as benchmarks, where the entrypoint file matches `/src/cli/.ts` ### agent -`npm run agent` will run the autonomous agent configured in `src/cli/agent.ts` +`npm run agent` or `ss agent` will run the autonomous agent configured in `src/cli/agent.ts`. Note that the agent will have the functions available configured in the `agent.ts` file. If no arguments are supplied the user prompt will be read from `src/cli/agent-in` @@ -44,7 +69,7 @@ npm run agent research the latest news about large language models and write a r ### code -`npm run code` runs the CodeEditingAgent configured in `src/cli/code.ts` +`npm run code` or `ss code` runs the [CodeEditingAgent](/software-engineer/) configured in `src/cli/code.ts` Without arguments the prompt is read from `src/cli/code-in` otherwise it uses the provided arguments for the prompt. @@ -54,12 +79,22 @@ This is a useful for editing the sophia codebase. You could run a command like: npm run code In the anthropic vertex class update the pricing for claude 3.5 sonnet to be 3 dollars per million input tokens and 15 dollars per million output tokens ``` -When editing other local repositories you will need to provide the initial arg `-fs=` to set the agent's virtual filesystem working -directory to the repository you want to edit. +When editing other repositories you will need use the `ss` command to run the agent with its virtual filesystem working +directory set to the current shell directory. + +### index + +`npm run index` or `ss index` + +This runs the agent which indexes a repository, and stores the summary index docs under `.sophia/docs` + +### slack + +`npm run slack` or `ss slack` starts the Slack chatbot. The chatbot will have the functions available defined in `src/modules/slack/slackChatBotService.ts` ### swe -`npm run swe` runs the SoftwareDeveloperAgent configured in `src/cli/swe.ts` +`npm run swe` or `ss swe` runs the SoftwareDeveloperAgent configured in `src/cli/swe.ts` Without arguments the prompt is read from `src/cli/swe-in` otherwise it uses the provided arguments for the prompt. @@ -69,7 +104,7 @@ This agent can be used for process automation and handling requests within the l ### gen -`npm run gen` runs the script at `src/cli/gen.ts` +`npm run gen` or `ss gen` runs the script at `src/cli/gen.ts` This simply generates text from a prompt. As with the other scripts you can provide arguments for a quick prompt. Otherwise, prepare the prompt in `src/cli/gen-in` and don't provide any other arguments. @@ -90,7 +125,7 @@ Make sure the directory you save the files to is in the .gitignore. ### scrape -`npm run scrape ` runs the PublicWeb.getWebPage function which uses a headless browser to scrape a web page, and then converts +`npm run scrape ` or `ss scrape ` runs the PublicWeb.getWebPage function which uses a headless browser to scrape a web page, and then converts it to a slim format by using the `@mozilla/readability` module to first extract the main contents of the page, and then the `turndown` package to convert the HTML to Markdown, further reducing the token count. @@ -98,11 +133,9 @@ By default, it writes the output to `scrape.md`. Alternatively you can provide a ### query -`npm run query ` runs the codebase query agent at *src/swe/discovery/codebaseQuery.ts* which can answer ad hoc +`npm run query ` or `ss query ` runs the codebase query agent at *src/swe/discovery/fileSelectionAgent.ts* which can answer ad hoc questions about a codebase/folder contents. - - ## Development ### Running tests @@ -116,25 +149,6 @@ npm run test ``` - - -## CLI usage optimizations - -### Helper CLI script - -To run Sophia agents in other folders and repositories, the script at `bin/path/ss` allows you to invoke the Sophia package.json scripts from any directory. - -To use this you in your shell config files (e.g. ~/.bashrc, ~/.zshrc) - -- Set the `SOPHIA_HOME` variable to the path of the Sophia repository. -- Add `$SOPHIA_HOME/bin/path` to the `PATH` variable. - -Then from any folder you can run commands like: - -`ss query what test frameworks does this repository use` - -Where *query* is the Sophia package.json script. For all the examples in the CLI scripts section above you can replace `npm run` with `ss` - ### Speech-to-text Speech-to-text is useful writing longer prompts with additional details to guide the agents. diff --git a/docs/docs/index.md b/docs/docs/index.md index 3ab4d011..930a6cc4 100644 --- a/docs/docs/index.md +++ b/docs/docs/index.md @@ -4,63 +4,36 @@ Sophia banner

- The open TypeScript platform for AI agents and LLM based workflows
+ The open TypeScript platform for AI agents, workflows & chat
The Ancient Greek word sophía (σοφία) variously translates to "clever, skillful, intelligent, wise"

-Sophia is a full-featured platform for developing and running agents, LLM based workflows and chatbots. +Sophia is a full-featured platform for developing and running autonomous agents, LLM based workflows, Slack chatbots, AI chat and more. Included are capable software engineering agents, which have assisted building the platform. -## Key features +## High level features - [Advanced Autonomous agents](https://sophia.dev/autonomous-agents) + - Faster/cheaper actions by generated function calling code (with sandboxed execution) + - Complex tasks supported with memory, function call history, live files, file store etc. + - Cost management with configurable Human-in-the-loop settings and cost tracking + - Persistent state management. Restart from completion/error/human-in-loop - [Software developer agents](https://sophia.dev/software-engineer/) + - Local repository editing + - Ticket-to-pull request workflow + - Repository indexing and ad-hoc query agents + - Leverages [Aider](https://aider.chat/) for diff editing - [Pull request code review agent](https://sophia.dev/code-review/) - [AI chat interface](https://sophia.dev/chat/) - [Slack chatbot](https://sophia.dev/chatbot/) -- Supports many LLM services - OpenAI, Anthropic (native & Vertex), Gemini, Groq, Fireworks, Together.ai, DeepSeek, Ollama, Cerebras, X.ai +- Supports many LLM services - OpenAI, Anthropic (native & Vertex), Gemini, Groq, Fireworks, Together.ai, DeepSeek, Ollama, Cerebras, X.ai and more. +- Simple LLM interface wrapping the [Vercel ai](https://sdk.vercel.ai/) package to add tracing and cost tracking. - Multi-agent [extend-reasoning implementations](https://github.com/TrafficGuard/sophia/tree/main/src/llm/multi-agent) of the LLM interface -- Configurable Human-in-the-loop settings - Functional callable tools (Filesystem, Jira, Slack, Perplexity, Google Cloud, Gitlab, GitHub etc) -- CLI and Web UI interface -- Run locally or deployed on the cloud with multi-user/SSO - OpenTelemetry based observability - Leverages the extensive Python AI ecosystem through executing Python scripts/packages -## Autonomous agents - -- Reasoning/planning inspired from Google's [Self-Discover](https://arxiv.org/abs/2402.03620) and other papers -- Memory and function call history for complex workflows -- Iterative planning with hierarchical task decomposition -- Sandboxed execution of generated code for multi-step function calling and logic -- LLM function schemas auto-generated from source code -- Human-in-the-loop for budget control, agent initiated questions and error handling - -Full details at the [Autonomous agent docs](https://sophia.dev/autonomous-agents) - -## Software developer agents - -- Code Editing Agent for local repositories - - Auto-detection of project initialization, compile, test and lint - - Task file selection agent selects the relevant files - - Design agent creates the implementation plan. - - Code editing loop with compile, lint, test, fix (editing delegates to [Aider](https://aider.chat/)) - - Compile error analyser can search online, add additional files and packages - - Final review of the changes with an additional code editing loop if required. -- Software Engineer Agent (For ticket to Pull Request workflow): - - Find the appropriate repository from GitLab/GitHub - - Clone and create branch - - Call the Code Editing Agent - - Create merge request -- Code Review agent: - - Configurable code review guidelines - - Posts comments on GitLab merge requests at the appropriate line with suggested changes -- Repository ad hoc query agent -- Codebase awareness - optional index creation used by the task file selection agent - -Full details at the [Software developer agents](https://sophia.dev/software-engineer/) docs. - ## Flexible run/deploy options - Run from the repository or the provided Dockerfile in single user mode. diff --git a/docs/docs/software-engineer.md b/docs/docs/software-engineer.md index ccd6d867..64ef8258 100644 --- a/docs/docs/software-engineer.md +++ b/docs/docs/software-engineer.md @@ -1,6 +1,6 @@ # AI Coding Agents -The Sophia software/coding agents build upon the project [Aider](https://aider.chat/), providing additional layers around it for more autonomous use cases. +The Sophia software/coding agents build upon the project [Aider](https://aider.chat/), providing additional agents around it for quality and automation. ## Code Editing Agent diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 266f6030..dffe6f39 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -39,18 +39,20 @@ nav: - 'Setup': setup.md - 'CLI': cli.md - 'Environment variables': environment-variables.md - - 'Observability': observability.md - - Concepts: - - functions.md - - agent-concepts.md + - 'Observability / Tracing': observability.md + - 'LLMs': llms.md - Agents: + - agent-concepts.md - autonomous-agents.md - software-engineer.md - code-review.md - - chatbot.md - examples.md - - integrations.md - - chat.md + - Function Calling: + - functions.md + - integrations.md + - Chat: + - chat.md + - chatbot.md - roadmap.md - Blog: - blog/index.md diff --git a/frontend/package-lock.json b/frontend/package-lock.json index f5e60595..e162c7e7 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -1,6 +1,6 @@ { "name": "@sophia/ui", - "version": "0.5.0", + "version": "0.6.0", "lockfileVersion": 3, "requires": true, "packages": { diff --git a/frontend/package.json b/frontend/package.json index e805bffd..0f12c7db 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -1,6 +1,6 @@ { "name": "@sophia/ui", - "version": "0.5.0", + "version": "0.6.0", "description": "Sophia AI platform", "author": "https://themeforest.net/user/srcn, Daniel Campagnoli, TrafficGuard Pty Ltd, and contributors", "license": "https://themeforest.net/licenses/standard", diff --git a/frontend/src/app/app.routes.ts b/frontend/src/app/app.routes.ts index 6b05291a..24712c53 100644 --- a/frontend/src/app/app.routes.ts +++ b/frontend/src/app/app.routes.ts @@ -79,7 +79,7 @@ export const appRoutes: Route[] = [ {path: 'chat', loadChildren: () => import('app/modules/chat/chat.routes')}, {path: 'agents', loadChildren: () => import('app/modules/agents/agent.routes')}, {path: 'code-reviews', loadChildren: () => import('app/modules/code-review/code-review.routes')}, - {path: 'actions', loadChildren: () => import('app/modules/actions/actions.routes')}, + {path: 'workflows', loadChildren: () => import('app/modules/workflows/workflows.routes')}, ] } ]; diff --git a/frontend/src/app/mock-api/common/navigation/data.ts b/frontend/src/app/mock-api/common/navigation/data.ts index 749a207c..2e6d0b8b 100644 --- a/frontend/src/app/mock-api/common/navigation/data.ts +++ b/frontend/src/app/mock-api/common/navigation/data.ts @@ -31,11 +31,11 @@ export const defaultNavigation: FuseNavigationItem[] = [ link: '/ui/agents/new', }, { - id: 'actions', - title: 'Actions', + id: 'workflows', + title: 'Workflows', type: 'basic', icon: 'heroicons_outline:server-stack', - link: '/ui/actions', + link: '/ui/workflows', }, { id: 'codereviews', diff --git a/frontend/src/app/modules/agents/agent-list/agent-list.component.html b/frontend/src/app/modules/agents/agent-list/agent-list.component.html index d2e257d4..a2ce6cc7 100644 --- a/frontend/src/app/modules/agents/agent-list/agent-list.component.html +++ b/frontend/src/app/modules/agents/agent-list/agent-list.component.html @@ -44,19 +44,27 @@
@if (agents$ | async; as agents) { - @if (agents.length > 0) { -
- -
-
-
Name
-
State
- -
User Prompt
- - -
+
+ +
+
+
Name
+
State
+ +
User Prompt
+ + +
+ + @if (agents === null) { +
+
+ Loading... +
+
+ } + @else if (agents.length > 0) { @for (agent of agents; track trackByFn($index, agent)) {
@@ -96,35 +104,35 @@ ${{ agent.cost | number: '1.2-2' }}
- } -
- - } @else { -
-
There are no agents!
- + + }} @else { +
+
There are no agents!
-
- } + + +
+ } +
}
diff --git a/frontend/src/app/modules/agents/services/agent.service.ts b/frontend/src/app/modules/agents/services/agent.service.ts index 3221030b..cf33d038 100644 --- a/frontend/src/app/modules/agents/services/agent.service.ts +++ b/frontend/src/app/modules/agents/services/agent.service.ts @@ -1,87 +1,175 @@ import { HttpClient } from '@angular/common/http'; import { Injectable } from '@angular/core'; -import {BehaviorSubject, map, Observable, tap} from 'rxjs'; +import { + BehaviorSubject, + Observable, + throwError, +} from 'rxjs'; +import { + catchError, + map, + tap, +} from 'rxjs/operators'; import { AgentContext, AgentPagination, LlmCall } from '../agent.types'; @Injectable({ providedIn: 'root' }) export class AgentService { - private _agents: BehaviorSubject = new BehaviorSubject(null); - - private _pagination: BehaviorSubject = - new BehaviorSubject({ - length: 0, - size: 0, - endIndex: 0, - page: 0, - lastPage: 0, - startIndex: 0 - }); - - constructor(private _httpClient: HttpClient) {} - - get agents$(): Observable { - return this._agents.asObservable(); - } + /** Holds the list of agents */ + private _agents$: BehaviorSubject = new BehaviorSubject(null); - get pagination$(): Observable { - return this._pagination.asObservable(); - } + /** Exposes the agents as an observable */ + public agents$ = this._agents$.asObservable(); - getAgents(): Observable { - return this._httpClient.get(`/api/agent/v1/list`).pipe( - tap((agents) => { - agents = (agents as any).data; - this._agents.next(agents); - }) - ); - } + private _pagination: BehaviorSubject = + new BehaviorSubject({ + length: 0, + size: 0, + endIndex: 0, + page: 0, + lastPage: 0, + startIndex: 0 + }); - getAgentDetails(agentId: string): Observable { - return this._httpClient.get(`/api/agent/v1/details/${agentId}`); - } + constructor(private _httpClient: HttpClient) { + // Load initial data + this.loadAgents(); + } - getLlmCalls(agentId: string): Observable { - return this._httpClient.get(`/api/llms/calls/agent/${agentId}`).pipe( - map((llmCalls) => { - llmCalls = (llmCalls as any).data; - return llmCalls; - }) - ); - } + get pagination$(): Observable { + return this._pagination.asObservable(); + } - submitFeedback(agentId: string, executionId: string, feedback: string): Observable { - return this._httpClient.post(`/api/agent/v1/feedback`, { agentId, executionId, feedback }); - } + /** Loads agents from the server and updates the BehaviorSubject */ + private loadAgents(): void { + this._httpClient.get<{ data: AgentContext[] }>('/api/agent/v1/list').pipe( + map(response => response.data || []), + tap(agents => this._agents$.next(agents)), + catchError(error => { + console.error('Error fetching agents', error); + return throwError(error); + }) + ).subscribe(); + } - resumeAgent(agentId: string, executionId: string, feedback: string): Observable { - return this._httpClient.post(`/api/agent/v1/resume-hil`, { agentId, executionId, feedback }); - } + /** Retrieves the current list of agents */ + getAgents(): Observable { + return this.agents$; + } - cancelAgent(agentId: string, executionId: string, reason: string): Observable { - return this._httpClient.post(`/api/agent/v1/cancel`, { agentId, executionId, reason }); - } + /** + * Refreshes the agents data from the server + */ + refreshAgents(): void { + this.loadAgents(); + } - updateAgentFunctions(agentId: string, functions: string[]): Observable { - return this._httpClient.post(`/api/agent/v1/update-functions`, { agentId, functions }); - } + /** Get agent details */ + getAgentDetails(agentId: string): Observable { + return this._httpClient.get(`/api/agent/v1/details/${agentId}`).pipe( + catchError(error => this.handleError('Load agent', error)) + ); + } - deleteAgents(agentIds: string[]): Observable { - return this._httpClient.post(`/api/agent/v1/delete`, { agentIds }); - } + /** Get LLM calls */ + getLlmCalls(agentId: string): Observable { + return this._httpClient.get<{ data: LlmCall[] }>(`/api/llms/calls/agent/${agentId}`).pipe( + map(response => response.data || []) + ); + } - resumeError(agentId: string, executionId: string, feedback: string): Observable { - return this._httpClient.post(`/api/agent/v1/resume-error`, { - agentId, - executionId, - feedback - }); + /** Updates the local cache when an agent is modified */ + private updateAgentInCache(updatedAgent: AgentContext): void { + const agents = this._agents$.getValue(); + const index = agents.findIndex(agent => agent.agentId === updatedAgent.agentId); + if (index !== -1) { + const updatedAgents = [...agents]; + updatedAgents[index] = updatedAgent; + this._agents$.next(updatedAgents); + } else { + // Optionally handle the case where the agent isn't found + // For example, add the new agent to the list + this._agents$.next([...agents, updatedAgent]); } + } - resumeCompletedAgent(agentId: string, executionId: string, instructions: string): Observable { - return this._httpClient.post(`/api/agent/v1/resume-completed`, { - agentId, - executionId, - instructions - }); - } + /** Removes agents from the local cache */ + private removeAgentsFromCache(agentIds: string[]): void { + const agents = this._agents$.getValue(); + const updatedAgents = agents.filter(agent => !agentIds.includes(agent.agentId)); + this._agents$.next(updatedAgents); + } + + /** Handles errors and logs them */ + private handleError(operation: string, error: any): Observable { + console.error(`Error during ${operation}`, error); + return throwError(error); + } + + /** Submits feedback and updates the local cache */ + submitFeedback(agentId: string, executionId: string, feedback: string): Observable { + return this._httpClient.post(`/api/agent/v1/feedback`, { agentId, executionId, feedback } + ).pipe( + tap(updatedAgent => this.updateAgentInCache(updatedAgent)), + catchError(error => this.handleError('submitFeedback', error)) + ); + } + + /** Resumes an agent and updates the local cache */ + resumeAgent(agentId: string, executionId: string, feedback: string): Observable { + return this._httpClient.post(`/api/agent/v1/resume-hil`, { agentId, executionId, feedback } + ).pipe( + tap(updatedAgent => this.updateAgentInCache(updatedAgent)), + catchError(error => this.handleError('resumeAgent', error)) + ); + } + + /** Cancels an agent and updates the local cache */ + cancelAgent(agentId: string, executionId: string, reason: string): Observable { + return this._httpClient.post(`/api/agent/v1/cancel`, { agentId, executionId, reason } + ).pipe( + tap(updatedAgent => this.updateAgentInCache(updatedAgent)), + catchError(error => this.handleError('cancelAgent', error)) + ); + } + + /** Updates agent functions and updates the local cache */ + updateAgentFunctions(agentId: string, functions: string[]): Observable { + return this._httpClient.post( + `/api/agent/v1/update-functions`, + { agentId, functions } + ).pipe( + tap(updatedAgent => this.updateAgentInCache(updatedAgent)), + catchError(error => this.handleError('updateAgentFunctions', error)) + ); + } + + /** Deletes agents and updates the local cache */ + deleteAgents(agentIds: string[]): Observable { + return this._httpClient.post(`/api/agent/v1/delete`, { agentIds }).pipe( + tap(() => this.removeAgentsFromCache(agentIds)), + catchError(error => this.handleError('deleteAgents', error)) + ); + } + + /** Resumes an agent from error and updates the local cache */ + resumeError(agentId: string, executionId: string, feedback: string): Observable { + return this._httpClient.post( + `/api/agent/v1/resume-error`, + { agentId, executionId, feedback } + ).pipe( + tap(updatedAgent => this.updateAgentInCache(updatedAgent)), + catchError(error => this.handleError('resumeError', error)) + ); + } + + /** Resumes a completed agent and updates the local cache */ + resumeCompletedAgent(agentId: string, executionId: string, instructions: string): Observable { + return this._httpClient.post( + `/api/agent/v1/resume-completed`, + { agentId, executionId, instructions } + ).pipe( + tap(updatedAgent => this.updateAgentInCache(updatedAgent)), + catchError(error => this.handleError('resumeCompletedAgent', error)) + ); + } } diff --git a/frontend/src/app/modules/chat/chat-info/chat-info.component.spec.ts b/frontend/src/app/modules/chat/chat-info/chat-info.component.spec.ts index 29ab50fc..cfc56a40 100644 --- a/frontend/src/app/modules/chat/chat-info/chat-info.component.spec.ts +++ b/frontend/src/app/modules/chat/chat-info/chat-info.component.spec.ts @@ -62,16 +62,16 @@ describe('ChatInfoComponent', () => { const sliders = await loader.getAllHarnesses(MatSliderHarness); // Verify initial values match mock user settings - expect(await sliders[0].getValue()).toBe(0.7); // temperature - expect(await sliders[1].getValue()).toBe(0.9); // topP - expect(await sliders[2].getValue()).toBe(0.5); // presencePenalty - expect(await sliders[3].getValue()).toBe(0.5); // frequencyPenalty + expect(await (await sliders[0].getEndThumb()).getValue()).toBe(0.7); // temperature + expect(await (await sliders[1].getEndThumb()).getValue()).toBe(0.9); // topP + expect(await (await sliders[2].getEndThumb()).getValue()).toBe(0.5); // presencePenalty + expect(await (await sliders[3].getEndThumb()).getValue()).toBe(0.5); // frequencyPenalty }); it('should update settings when sliders change', async () => { // Get temperature slider and change its value const temperatureSlider = await loader.getHarness(MatSliderHarness); - await temperatureSlider.setValue(1.5); + await (await temperatureSlider.getEndThumb()).setValue(1.5); // Verify component state was updated expect(component.settings.temperature).toBe(1.5); diff --git a/frontend/src/app/modules/chat/chat.service.ts b/frontend/src/app/modules/chat/chat.service.ts index 1264338a..8afcba98 100644 --- a/frontend/src/app/modules/chat/chat.service.ts +++ b/frontend/src/app/modules/chat/chat.service.ts @@ -19,12 +19,14 @@ import {GenerateOptions} from "app/core/user/user.types"; export class ChatService { private _chat: BehaviorSubject = new BehaviorSubject(null); private _chats: BehaviorSubject = new BehaviorSubject(null); + /** Flag indicating whether chats have been loaded from the server */ + private _chatsLoaded: boolean = false; /** * Constructor */ constructor(private _httpClient: HttpClient) { - this.getChats(); + // Chats will be loaded on-demand via getChats() } private base64ToBlob(base64: string, mimeType: string): Blob { @@ -71,13 +73,26 @@ export class ChatService { // ----------------------------------------------------------------------------------------------------- /** - * Get chats + * Get chats - returns cached data if available, otherwise fetches from server + * @returns Observable of Chat array */ getChats(): Observable { + // Return cached chats if already loaded + if (this._chatsLoaded && this._chats.value) { + return of(this._chats.value); + } + + // Otherwise fetch from server return this._httpClient.get('/api/chats').pipe( tap((response: Chat[]) => { - response = (response as any).data.chats + response = (response as any).data.chats; this._chats.next(response); + this._chatsLoaded = true; + }), + catchError((error) => { + // Reset loaded flag on error to prevent caching failed state + this._chatsLoaded = false; + return throwError(() => error); }) ); } @@ -109,7 +124,7 @@ export class ChatService { tap(() => { const currentChats = this._chats.value || []; this._chats.next(currentChats.filter(chat => chat.id !== chatId)); - if (this._chat.getValue().id === chatId) { + if (this._chat.getValue()?.id === chatId) { this._chat.next(null); } }) diff --git a/frontend/src/app/modules/chat/chats/chats.component.ts b/frontend/src/app/modules/chat/chats/chats.component.ts index 941dc497..1821ea48 100644 --- a/frontend/src/app/modules/chat/chats/chats.component.ts +++ b/frontend/src/app/modules/chat/chats/chats.component.ts @@ -18,6 +18,7 @@ import { ChatService } from 'app/modules/chat/chat.service'; import { Chat } from 'app/modules/chat/chat.types'; import { Subject, takeUntil } from 'rxjs'; import { FuseConfirmationService } from '@fuse/services/confirmation'; +import {MatSnackBar} from "@angular/material/snack-bar"; @Component({ selector: 'chat-chats', @@ -49,6 +50,7 @@ export class ChatsComponent implements OnInit, OnDestroy { */ constructor( private _chatService: ChatService, + private snackBar: MatSnackBar, private _changeDetectorRef: ChangeDetectorRef, private confirmationService: FuseConfirmationService, ) {} @@ -61,7 +63,17 @@ export class ChatsComponent implements OnInit, OnDestroy { * On init */ ngOnInit(): void { - // Chats + // Load chats if not already loaded + this._chatService.getChats() + .pipe(takeUntil(this._unsubscribeAll)) + .subscribe({ + error: (error) => { + this.snackBar.open('Error loading chats') + console.error('Failed to load chats:', error); + } + }); + + // Subscribe to chats updates this._chatService.chats$ .pipe(takeUntil(this._unsubscribeAll)) .subscribe((chats: Chat[]) => { diff --git a/frontend/src/app/modules/chat/conversation/conversation.component.ts b/frontend/src/app/modules/chat/conversation/conversation.component.ts index cff73914..89db461c 100644 --- a/frontend/src/app/modules/chat/conversation/conversation.component.ts +++ b/frontend/src/app/modules/chat/conversation/conversation.component.ts @@ -495,6 +495,15 @@ export class ConversationComponent implements OnInit, OnDestroy, AfterViewInit { this.llmSelect.open(); this.llmSelect.focus(); } + if (event.key === 'a' && event.ctrlKey) { + this.fileInput.nativeElement.click(); + } + if (event.key === 'e' && event.ctrlKey) { + this.toggleSendOnEnter(); + } + if (event.key === 'i' && event.ctrlKey) { + this.drawerOpened = !this.drawerOpened + } } toggleSendOnEnter(): void { diff --git a/frontend/src/app/modules/profile/account/account.component.html b/frontend/src/app/modules/profile/account/account.component.html index 2a4f413b..ce85781c 100644 --- a/frontend/src/app/modules/profile/account/account.component.html +++ b/frontend/src/app/modules/profile/account/account.component.html @@ -134,6 +134,13 @@ + +
+ + Nebius + + +
diff --git a/frontend/src/app/modules/profile/account/account.component.ts b/frontend/src/app/modules/profile/account/account.component.ts index 1b0dda2b..cbac762c 100644 --- a/frontend/src/app/modules/profile/account/account.component.ts +++ b/frontend/src/app/modules/profile/account/account.component.ts @@ -75,6 +75,7 @@ export class SettingsAccountComponent implements OnInit { deepinfraKey: new FormControl(''), cerebrasKey: new FormControl(''), xaiKey: new FormControl(''), + nebiusKey: new FormControl(''), }), chat: new FormGroup({ defaultLLM: new FormControl(''), diff --git a/frontend/src/app/modules/actions/actions.component.html b/frontend/src/app/modules/workflows/workflows.component.html similarity index 90% rename from frontend/src/app/modules/actions/actions.component.html rename to frontend/src/app/modules/workflows/workflows.component.html index 576c77e9..33e49c1a 100644 --- a/frontend/src/app/modules/actions/actions.component.html +++ b/frontend/src/app/modules/workflows/workflows.component.html @@ -3,7 +3,7 @@
-
Actions
+
Workflows
@@ -20,8 +20,8 @@ - Operation Type - + Workflow Type + Code Edit Workflow Codebase Query Select Files To Edit diff --git a/frontend/src/app/modules/actions/actions.component.scss b/frontend/src/app/modules/workflows/workflows.component.scss similarity index 100% rename from frontend/src/app/modules/actions/actions.component.scss rename to frontend/src/app/modules/workflows/workflows.component.scss diff --git a/frontend/src/app/modules/actions/actions.component.spec.ts b/frontend/src/app/modules/workflows/workflows.component.spec.ts similarity index 100% rename from frontend/src/app/modules/actions/actions.component.spec.ts rename to frontend/src/app/modules/workflows/workflows.component.spec.ts diff --git a/frontend/src/app/modules/actions/actions.component.ts b/frontend/src/app/modules/workflows/workflows.component.ts similarity index 82% rename from frontend/src/app/modules/actions/actions.component.ts rename to frontend/src/app/modules/workflows/workflows.component.ts index 5e39a805..ec1169be 100644 --- a/frontend/src/app/modules/actions/actions.component.ts +++ b/frontend/src/app/modules/workflows/workflows.component.ts @@ -8,14 +8,14 @@ import { MatSelectModule } from "@angular/material/select"; import { MatCardModule } from "@angular/material/card"; import { MatProgressBarModule } from "@angular/material/progress-bar"; import { MatInputModule } from '@angular/material/input'; -import { ActionsService } from "./actions.service"; +import { WorkflowsService } from "./workflows.service"; import {MatIconModule} from "@angular/material/icon"; import {MatButtonModule} from "@angular/material/button"; @Component({ selector: 'app-code', - templateUrl: './actions.component.html', - styleUrls: ['./actions.component.scss'], + templateUrl: './workflows.component.html', + styleUrls: ['./workflows.component.scss'], standalone: true, imports: [ CommonModule, @@ -29,13 +29,13 @@ import {MatButtonModule} from "@angular/material/button"; MatButtonModule, ] }) -export class ActionsComponent implements OnInit { +export class WorkflowsComponent implements OnInit { codeForm!: FormGroup; result: string = ''; isLoading = false; repositories: string[] = []; - constructor(private fb: FormBuilder, private actionsService: ActionsService) {} + constructor(private fb: FormBuilder, private workflowsService: WorkflowsService) {} ngOnInit() { this.codeForm = this.fb.group({ @@ -44,7 +44,7 @@ export class ActionsComponent implements OnInit { input: ['', Validators.required], }); - this.actionsService.getRepositories().subscribe({ + this.workflowsService.getRepositories().subscribe({ next: (repos: string[]) => { this.repositories = repos; if (repos.length > 0) { @@ -59,8 +59,8 @@ export class ActionsComponent implements OnInit { } getInputLabel(): string { - const operationType = this.codeForm.get('operationType')?.value; - switch (operationType) { + const workflowType = this.codeForm.get('workflowType')?.value; + switch (workflowType) { case 'code': return 'Requirements'; case 'query': @@ -92,13 +92,13 @@ export class ActionsComponent implements OnInit { switch (operationType) { case 'code': - operation = this.actionsService.runCodeEditWorkflow(workingDirectory, input); + operation = this.workflowsService.runCodeEditWorkflow(workingDirectory, input); break; case 'query': - operation = this.actionsService.runCodebaseQuery(workingDirectory, input); + operation = this.workflowsService.runCodebaseQuery(workingDirectory, input); break; case 'selectFiles': - operation = this.actionsService.selectFilesToEdit(workingDirectory, input); + operation = this.workflowsService.selectFilesToEdit(workingDirectory, input); break; default: this.result = 'Error: Invalid operation type'; diff --git a/frontend/src/app/modules/actions/actions.routes.ts b/frontend/src/app/modules/workflows/workflows.routes.ts similarity index 51% rename from frontend/src/app/modules/actions/actions.routes.ts rename to frontend/src/app/modules/workflows/workflows.routes.ts index 141641ca..f4b8e501 100644 --- a/frontend/src/app/modules/actions/actions.routes.ts +++ b/frontend/src/app/modules/workflows/workflows.routes.ts @@ -1,9 +1,9 @@ import { Routes } from '@angular/router'; -import {ActionsComponent} from "./actions.component"; +import {WorkflowsComponent} from "./workflows.component"; export default [ { path: '', - component: ActionsComponent, + component: WorkflowsComponent, }, ] as Routes; diff --git a/frontend/src/app/modules/actions/actions.service.ts b/frontend/src/app/modules/workflows/workflows.service.ts similarity index 61% rename from frontend/src/app/modules/actions/actions.service.ts rename to frontend/src/app/modules/workflows/workflows.service.ts index f64d8dc6..4b7262fb 100644 --- a/frontend/src/app/modules/actions/actions.service.ts +++ b/frontend/src/app/modules/workflows/workflows.service.ts @@ -8,22 +8,22 @@ import {environment} from "../../../environments/environment"; @Injectable({ providedIn: 'root', }) -export class ActionsService { +export class WorkflowsService { constructor(private http: HttpClient) {} runCodeEditWorkflow(workingDirectory: string, requirements: string): Observable { - return this.http.post(`/api/code/edit`, { workingDirectory, requirements }); + return this.http.post(`/api/workflows/edit`, { workingDirectory, requirements }); } runCodebaseQuery(workingDirectory: string, query: string): Observable<{ response: string }> { - return this.http.post<{ response: string }>(`/api/code/query`, { workingDirectory, query }); + return this.http.post<{ response: string }>(`/api/workflows/query`, { workingDirectory, query }); } selectFilesToEdit(workingDirectory: string, requirements: string): Observable { - return this.http.post(`/api/code/select-files`, { workingDirectory, requirements }); + return this.http.post(`/api/workflows/select-files`, { workingDirectory, requirements }); } getRepositories(): Observable { - return this.http.get(`/api/code/repositories`); + return this.http.get(`/api/workflows/repositories`); } } diff --git a/package-lock.json b/package-lock.json index 273f74f1..ee57e810 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,6 +1,6 @@ { "name": "@trafficguard/sophia", - "version": "0.5.0", + "version": "0.6.0", "lockfileVersion": 2, "requires": true, "packages": { @@ -338,21 +338,6 @@ } } }, - "node_modules/@ai-sdk/google": { - "version": "1.0.12", - "resolved": "https://registry.npmjs.org/@ai-sdk/google/-/google-1.0.12.tgz", - "integrity": "sha512-vZUK8X997tKmycwCa9d26PoGtIyNEILykYb6JscMoA/pfr5Nss8Ox1JtSGn+PRkehpJhclOaLNWV1JQAjp73aA==", - "dependencies": { - "@ai-sdk/provider": "1.0.3", - "@ai-sdk/provider-utils": "2.0.5" - }, - "engines": { - "node": ">=18" - }, - "peerDependencies": { - "zod": "^3.0.0" - } - }, "node_modules/@ai-sdk/google-vertex": { "version": "2.0.12", "resolved": "https://registry.npmjs.org/@ai-sdk/google-vertex/-/google-vertex-2.0.12.tgz", @@ -386,6 +371,21 @@ "zod": "^3.0.0" } }, + "node_modules/@ai-sdk/google-vertex/node_modules/@ai-sdk/google": { + "version": "1.0.12", + "resolved": "https://registry.npmjs.org/@ai-sdk/google/-/google-1.0.12.tgz", + "integrity": "sha512-vZUK8X997tKmycwCa9d26PoGtIyNEILykYb6JscMoA/pfr5Nss8Ox1JtSGn+PRkehpJhclOaLNWV1JQAjp73aA==", + "dependencies": { + "@ai-sdk/provider": "1.0.3", + "@ai-sdk/provider-utils": "2.0.5" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "zod": "^3.0.0" + } + }, "node_modules/@ai-sdk/groq": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/@ai-sdk/groq/-/groq-1.1.0.tgz", @@ -26436,15 +26436,6 @@ } } }, - "@ai-sdk/google": { - "version": "1.0.12", - "resolved": "https://registry.npmjs.org/@ai-sdk/google/-/google-1.0.12.tgz", - "integrity": "sha512-vZUK8X997tKmycwCa9d26PoGtIyNEILykYb6JscMoA/pfr5Nss8Ox1JtSGn+PRkehpJhclOaLNWV1JQAjp73aA==", - "requires": { - "@ai-sdk/provider": "1.0.3", - "@ai-sdk/provider-utils": "2.0.5" - } - }, "@ai-sdk/google-vertex": { "version": "2.0.12", "resolved": "https://registry.npmjs.org/@ai-sdk/google-vertex/-/google-vertex-2.0.12.tgz", @@ -26465,6 +26456,15 @@ "@ai-sdk/provider": "1.0.3", "@ai-sdk/provider-utils": "2.0.5" } + }, + "@ai-sdk/google": { + "version": "1.0.12", + "resolved": "https://registry.npmjs.org/@ai-sdk/google/-/google-1.0.12.tgz", + "integrity": "sha512-vZUK8X997tKmycwCa9d26PoGtIyNEILykYb6JscMoA/pfr5Nss8Ox1JtSGn+PRkehpJhclOaLNWV1JQAjp73aA==", + "requires": { + "@ai-sdk/provider": "1.0.3", + "@ai-sdk/provider-utils": "2.0.5" + } } } }, diff --git a/package.json b/package.json index b305e07a..d704bdfa 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@trafficguard/sophia", - "version": "0.5.0", + "version": "0.6.0", "description": "AI agent & LLM app platform", "private": true, "type": "commonjs", @@ -13,7 +13,7 @@ "gen": " node --env-file=variables/local.env -r ts-node/register src/cli/gen.ts", "agent": " node --env-file=variables/local.env -r ts-node/register src/cli/agent.ts", "blueberry": "node --env-file=variables/local.env -r ts-node/register src/cli/blueberry.ts", - "docs": " node --env-file=variables/local.env -r ts-node/register src/cli/docs.ts", + "index": " node --env-file=variables/local.env -r ts-node/register src/cli/index.ts", "easy": " node --env-file=variables/local.env -r ts-node/register src/cli/easy.ts", "gaia": " node --env-file=variables/local.env -r ts-node/register src/cli/gaia.ts", "py": " node --env-file=variables/local.env -r ts-node/register src/cli/py.ts", diff --git a/src/cli/docs.ts b/src/cli/index.ts similarity index 92% rename from src/cli/docs.ts rename to src/cli/index.ts index 51c1182f..22f98be2 100644 --- a/src/cli/docs.ts +++ b/src/cli/index.ts @@ -5,9 +5,9 @@ import { RunAgentConfig } from '#agent/agentRunner'; import { runAgentWorkflow } from '#agent/agentWorkflowRunner'; import { shutdownTrace } from '#fastify/trace-init/trace-init'; import { defaultLLMs } from '#llm/services/defaultLlms'; +import { buildIndexDocs } from '#swe/index/repoIndexDocBuilder'; +import { generateRepositoryMaps } from '#swe/index/repositoryMap'; import { detectProjectInfo } from '#swe/projectDetection'; -import { buildIndexDocs } from '#swe/repoIndexDocBuilder'; -import { generateRepositoryMaps } from '#swe/repositoryMap'; import { initApplicationContext } from '../applicationContext'; import { parseProcessArgs, saveAgentId } from './cli'; diff --git a/src/cli/query.ts b/src/cli/query.ts index 739c1afc..4e126d82 100644 --- a/src/cli/query.ts +++ b/src/cli/query.ts @@ -1,12 +1,13 @@ import '#fastify/trace-init/trace-init'; // leave an empty line next so this doesn't get sorted from the first line +import { writeFileSync } from 'fs'; import { agentContext, llms } from '#agent/agentContextLocalStorage'; import { AgentLLMs } from '#agent/agentContextTypes'; import { RunAgentConfig } from '#agent/agentRunner'; import { runAgentWorkflow } from '#agent/agentWorkflowRunner'; import { shutdownTrace } from '#fastify/trace-init/trace-init'; import { defaultLLMs } from '#llm/services/defaultLlms'; -import { codebaseQuery } from '#swe/discovery/codebaseQuery'; +import { queryWorkflow } from '#swe/discovery/selectFilesAgent'; import { appContext, initApplicationContext } from '../applicationContext'; import { parseProcessArgs, saveAgentId } from './cli'; @@ -37,8 +38,12 @@ async function main() { )}`; await appContext().agentStateService.save(agent); - const response = await codebaseQuery(initialPrompt); + const response = await queryWorkflow(initialPrompt); + console.log(response); + + writeFileSync('src/cli/gen-out', response); + console.log('Wrote output to src/cli/query-out'); }); if (agentId) { diff --git a/src/fastify/authenticationMiddleware.ts b/src/fastify/authenticationMiddleware.ts index 9830589f..b08355b3 100644 --- a/src/fastify/authenticationMiddleware.ts +++ b/src/fastify/authenticationMiddleware.ts @@ -6,6 +6,8 @@ import { ROUTES } from '../../shared/routes'; import { appContext } from '../applicationContext'; import { getPayloadUserId } from './jwt'; +const WEBHOOKS_BASE_PATH = '/api/webhooks/'; + // Middleware function export function singleUserMiddleware(req: FastifyRequest, _res: any, next: () => void): void { const user = appContext().userService.getSingleUser(); @@ -17,7 +19,7 @@ export function singleUserMiddleware(req: FastifyRequest, _res: any, next: () => export function jwtAuthMiddleware(req: FastifyRequest, reply: FastifyReply, done: () => void): void { // Skip auth for public endpoints - if (req.raw.url.startsWith('/webhooks/') || req.raw.url === DEFAULT_HEALTHCHECK || req.raw.url.startsWith(ROUTES.AUTH_ROUTE_PREFIX)) { + if (req.raw.url.startsWith(WEBHOOKS_BASE_PATH) || req.raw.url === DEFAULT_HEALTHCHECK || req.raw.url.startsWith(ROUTES.AUTH_ROUTE_PREFIX)) { done(); return; } @@ -51,12 +53,12 @@ export function jwtAuthMiddleware(req: FastifyRequest, reply: FastifyReply, done export function googleIapMiddleware(req: FastifyRequest, reply: FastifyReply, next: () => void) { // It would be nicer if the health-check was earlier in the chain. Maybe when nextauthjs integration is done. - if (req.raw.url.startsWith('/webhooks/') || req.raw.url === DEFAULT_HEALTHCHECK) { + if (req.raw.url.startsWith(WEBHOOKS_BASE_PATH) || req.raw.url === DEFAULT_HEALTHCHECK) { next(); return; } let email = req.headers['x-goog-authenticated-user-email']; - if (!email) throw new Error('x-goog-authenticated-user-email header not found'); + if (!email) throw new Error(`x-goog-authenticated-user-email header not found requesting ${req.raw.url}`); if (Array.isArray(email)) email = email[0]; // TODO validate the JWT https://cloud.google.com/iap/docs/signed-headers-howto#securing_iap_headers diff --git a/src/functions/scm/gitlab.ts b/src/functions/scm/gitlab.ts index af20a69d..e5a0d11e 100644 --- a/src/functions/scm/gitlab.ts +++ b/src/functions/scm/gitlab.ts @@ -40,13 +40,15 @@ export interface GitLabConfig { /** * AI review of a git diff */ -type DiffReview = { +interface DiffReview { mrDiff: MergeRequestDiffSchema; /** The code being reviewed from the diff */ code: string; /** Code review comments */ comments: Array<{ comment: string; lineNumber: number }>; -}; + /** The code review configuration */ + reviewConfig: CodeReviewConfig; +} // Note that the type returned from getProjects is mapped to GitProject export type GitLabProject = Pick< @@ -79,6 +81,7 @@ export class GitLab implements SourceControlManagement { private config(): GitLabConfig { if (!this._config) { const config = functionConfig(GitLab); + if (!config.token && !envVar('GITLAB_TOKEN')) logger.error('No GitLab token configured on the user or environment'); this._config = { host: config.host || envVar('GITLAB_HOST'), token: config.token || envVar('GITLAB_TOKEN'), @@ -89,12 +92,10 @@ export class GitLab implements SourceControlManagement { } private api(): any { - if (!this._gitlab) { - this._gitlab = new GitlabApi({ - host: `https://${this.config().host}`, - token: this.config().token, - }); - } + this._gitlab ??= new GitlabApi({ + host: `https://${this.config().host}`, + token: this.config().token, + }); return this._gitlab; } @@ -304,13 +305,13 @@ export class GitLab implements SourceControlManagement { if (!codeReview.enabled) continue; if (codeReview.projectPaths.length && !micromatch.isMatch(projectPath, codeReview.projectPaths)) { - logger.info(`Project path globs ${codeReview.projectPaths} dont match ${projectPath}`); + logger.debug(`Project path globs ${codeReview.projectPaths} dont match ${projectPath}`); continue; } const hasMatchingExtension = codeReview.fileExtensions?.include.some((extension) => diff.new_path.endsWith(extension)); const hasRequiredText = codeReview.requires?.text.some((text) => diff.diff.includes(text)); - logger.info(`hasMatchingExtension: ${hasMatchingExtension}. hasRequiredText: ${hasRequiredText}`); + if (hasMatchingExtension && hasRequiredText) { codeReviews.push(this.reviewDiff(diff, codeReview)); } @@ -326,7 +327,7 @@ export class GitLab implements SourceControlManagement { for (const diffReview of diffReviews) { for (const comment of diffReview.comments) { - logger.debug(comment, 'Review comment'); + logger.debug(comment, `Adding review comment to ${diffReview.mrDiff.new_path} for "${diffReview.reviewConfig.title}" [comment, lineNumber]`); const position: MergeRequestDiscussionNotePositionOptions = { baseSha: mergeRequest.diff_refs.base_sha, headSha: mergeRequest.diff_refs.head_sha, @@ -387,7 +388,7 @@ ${currentCode} Instructions: 1. Based on the provided code review guidelines, analyze the code changes from a diff and identify any potential violations. 2. Consider the overall context and purpose of the code when identifying violations. -3. Comments with a number at the start of lines indicate line numbers. Use these numbers to help determine the starting lineNumber for the review comment. +3. Comments with a number at the start of lines indicate line numbers. Use these numbers to help determine the starting lineNumber for the review comment. The comment should be on the line after the offending code. 4. Provide the review comments in the following JSON format. If no review violations are found return an empty array for violations. { @@ -405,7 +406,7 @@ Response only in JSON format. Do not wrap the JSON in any tags. violations: Array<{ lineNumber: number; comment: string }>; }; - return { code: currentCode, comments: reviewComments.violations, mrDiff }; + return { code: currentCode, comments: reviewComments.violations, mrDiff, reviewConfig: codeReview }; } @func() diff --git a/src/functions/storage/filesystem.test.ts b/src/functions/storage/fileSystemService.test.ts similarity index 100% rename from src/functions/storage/filesystem.test.ts rename to src/functions/storage/fileSystemService.test.ts diff --git a/src/functions/storage/fileSystemService.ts b/src/functions/storage/fileSystemService.ts index 21b02def..c16fe1d6 100644 --- a/src/functions/storage/fileSystemService.ts +++ b/src/functions/storage/fileSystemService.ts @@ -1,6 +1,6 @@ -import { access, existsSync, lstat, mkdir, readFile, readdir, stat, writeFileSync } from 'node:fs'; +import { access, existsSync, lstat, mkdir, readFile, readFileSync, readdir, readdirSync, stat, writeFileSync } from 'node:fs'; import { resolve } from 'node:path'; -import path, { join } from 'path'; +import path, { join, relative } from 'path'; import { promisify } from 'util'; // import { glob } from 'glob-gitignore'; import ignore, { Ignore } from 'ignore'; @@ -12,7 +12,7 @@ import { VersionControlSystem } from '#functions/scm/versionControlSystem'; import { LlmTools } from '#functions/util'; import { logger } from '#o11y/logger'; import { getActiveSpan, span } from '#o11y/trace'; -import { execCmd, spawnCommand } from '#utils/exec'; +import { execCmd, execCmdSync, spawnCommand } from '#utils/exec'; import { CDATA_END, CDATA_START, needsCDATA } from '#utils/xml-utils'; import { SOPHIA_FS } from '../../appVars'; @@ -30,6 +30,10 @@ const fs = { type FileFilter = (filename: string) => boolean; +// Cache paths to Git repositories and .gitignore files +const gitRoots = new Set(); +const gitIgnorePaths = new Set(); + /** * Interface to the file system based for an Agent which maintains the state of the working directory. * @@ -57,11 +61,11 @@ export class FileSystemService { constructor(public basePath?: string) { this.basePath ??= process.cwd(); - const args = process.argv; //.slice(2); // Remove the first two elements (node and script path) + const args = process.argv; const fsArg = args.find((arg) => arg.startsWith('--fs=')); const fsEnvVar = process.env[SOPHIA_FS]; if (fsArg) { - const fsPath = fsArg.slice(5); // Extract the value after '-fs=' + const fsPath = fsArg.slice(5); if (existsSync(fsPath)) { this.basePath = fsPath; logger.info(`Setting basePath to ${fsPath}`); @@ -78,10 +82,8 @@ export class FileSystemService { this.workingDirectory = this.basePath; this.log = logger.child({ FileSystem: this.basePath }); - // We will want to re-visit this, the .git folder can be in a parent directory - if (existsSync(path.join(this.basePath, '.git'))) { - this.vcs = new Git(this); - } + + if (this.getVcsRoot()) this.vcs = new Git(this); } toJSON() { @@ -116,20 +118,22 @@ export class FileSystemService { if (dir.startsWith('/')) { if (existsSync(dir)) { this.workingDirectory = dir; - this.log.info(`workingDirectory is now ${this.workingDirectory}`); - return; + } else { + // try it as a relative path + relativeDir = dir.substring(1); } - // try it as a relative path - relativeDir = dir.substring(1); } const relativePath = path.join(this.getWorkingDirectory(), relativeDir); if (existsSync(relativePath)) { this.workingDirectory = relativePath; - this.log.info(`workingDirectory is now ${this.workingDirectory}`); - return; + } else { + throw new Error(`New working directory ${dir} does not exist (current working directory ${this.workingDirectory}`); } - throw new Error(`New working directory ${dir} does not exist (current working directory ${this.workingDirectory}`); + // After setting the working directory, update the vcs (version control system) property + logger.info(`setWorkingDirectory ${this.workingDirectory}`); + const vcsRoot = this.getVcsRoot(); + this.vcs = vcsRoot ? new Git(this) : null; } /** @@ -238,11 +242,12 @@ export class FileSystemService { this.log.debug(`listFilesRecursively cwd: ${this.workingDirectory}`); const startPath = path.isAbsolute(dirPath) ? dirPath : path.join(this.getWorkingDirectory(), dirPath); - // TODO check isnt going higher than this.basePath + // TODO check isn't going higher than this.basePath - const ig = useGitIgnore ? await this.loadGitignoreRules(startPath) : ignore(); + const gitRoot = useGitIgnore ? this.getVcsRoot() : null; + const ig: Ignore = useGitIgnore ? await this.loadGitignoreRules(startPath, gitRoot) : ignore(); - const files: string[] = await this.listFilesRecurse(this.workingDirectory, startPath, ig, useGitIgnore); + const files: string[] = await this.listFilesRecurse(this.workingDirectory, startPath, ig, useGitIgnore, gitRoot); return files.map((file) => path.relative(this.workingDirectory, file)); } @@ -250,12 +255,13 @@ export class FileSystemService { rootPath: string, dirPath: string, parentIg: Ignore, - useGitIgnore = true, + useGitIgnore: boolean, + gitRoot: string | null, filter: (file: string) => boolean = (name) => true, ): Promise { const files: string[] = []; - const ig = useGitIgnore ? await this.loadGitignoreRules(dirPath) : ignore(); + const ig = useGitIgnore ? await this.loadGitignoreRules(dirPath, gitRoot) : ignore(); const mergedIg = ignore().add(parentIg).add(ig); const dirents = await fs.readdir(dirPath, { withFileTypes: true }); @@ -263,7 +269,7 @@ export class FileSystemService { const relativePath = path.relative(rootPath, path.join(dirPath, dirent.name)); if (dirent.isDirectory()) { if (!useGitIgnore || (!mergedIg.ignores(relativePath) && !mergedIg.ignores(`${relativePath}/`))) { - files.push(...(await this.listFilesRecurse(rootPath, path.join(dirPath, dirent.name), mergedIg, useGitIgnore, filter))); + files.push(...(await this.listFilesRecurse(rootPath, path.join(dirPath, dirent.name), mergedIg, useGitIgnore, gitRoot, filter))); } } else { if (!useGitIgnore || !mergedIg.ignores(relativePath)) { @@ -427,22 +433,39 @@ export class FileSystemService { await this.writeFile(filePath, updatedContent); } - async loadGitignoreRules(startPath: string): Promise { + async loadGitignoreRules(startPath: string, gitRoot: string | null): Promise { const ig = ignore(); let currentPath = startPath; - while (currentPath.startsWith(this.basePath)) { + // Continue until git root or filesystem root + while (true) { const gitIgnorePath = path.join(currentPath, '.gitignore'); - if (existsSync(gitIgnorePath)) { - const lines = await fs.readFile(gitIgnorePath, 'utf8').then((data) => - data - .split('\n') - .map((line) => line.trim()) - .filter((line) => line.length && !line.startsWith('#')), - ); + const knownGitIgnore = gitIgnorePaths.has(gitIgnorePath); + if (knownGitIgnore || existsSync(gitIgnorePath)) { + const lines = (await fs.readFile(gitIgnorePath, 'utf8')) + .split('\n') + .map((line) => line.trim()) + .filter((line) => line.length && !line.startsWith('#')); ig.add(lines); + + if (!knownGitIgnore) gitIgnorePaths.add(gitIgnorePath); + } + + // Check if we've reached the git root directory + if (gitRoot && currentPath === gitRoot) { + break; + } + + // Determine the parent directory + const parentPath = path.dirname(currentPath); + + // If we've reached the filesystem root, stop + if (parentPath === currentPath) { + break; } - currentPath = path.dirname(currentPath); + + // Move to the parent directory for the next iteration + currentPath = parentPath; } ig.add('.git'); @@ -480,7 +503,9 @@ export class FileSystemService { async getAllFoldersRecursively(dir = './'): Promise { const workingDir = this.getWorkingDirectory(); const startPath = path.join(workingDir, dir); - const ig = await this.loadGitignoreRules(startPath); + + const gitRoot = this.getVcsRoot(); + const ig = await this.loadGitignoreRules(startPath, gitRoot); const folders: string[] = []; @@ -580,19 +605,33 @@ export class FileSystemService { return tree; } - async getGitRoot(): Promise { + /** + * Gets the version control service (Git) repository root folder, if the current working directory is in a Git repo, else null. + */ + getVcsRoot(): string | null { + // First, check if workingDirectory is under any known Git roots + if (gitRoots.has(this.workingDirectory)) return this.workingDirectory; + + for (const gitRoot of gitRoots) { + if (this.workingDirectory.startsWith(gitRoot)) return gitRoot; + } + + // If not found in cache, execute Git command try { - // Use git rev-parse to get the root directory - const result = await execCmd('git rev-parse --show-toplevel'); + // Use execCmdSync to get the Git root directory synchronously + // Need to pass the workingDirectory to avoid recursion with the default workingDirectory arg + const result = execCmdSync('git rev-parse --show-toplevel', this.workingDirectory); - // If command succeeds, return the trimmed stdout (git root path) - if (!result.error) { - return result.stdout.trim(); + if (result.error) { + logger.error(result.error); + return null; } - - // If git command fails, return null - return null; - } catch { + const gitRoot = result.stdout.trim(); + // Store the new Git root in the cache + gitRoots.add(gitRoot); + return gitRoot; + } catch (e) { + logger.error(e, 'Error checking if in a Git repo'); // Any unexpected errors also result in null return null; } diff --git a/src/functions/web/perplexity.ts b/src/functions/web/perplexity.ts index 452931c1..edb96c8c 100644 --- a/src/functions/web/perplexity.ts +++ b/src/functions/web/perplexity.ts @@ -41,7 +41,7 @@ export class Perplexity { // Valid model ids are at https://docs.perplexity.ai/guides/model-cards response = await perplexity.chat.completions.create({ - model: 'llama-3.1-sonar-large-128k-online', + model: 'sonar-pro', max_tokens: 4096, messages: [{ role: 'user', content: researchQuery }], stream: false, diff --git a/src/llm/base-llm.ts b/src/llm/base-llm.ts index 9b00e450..bfb67b6b 100644 --- a/src/llm/base-llm.ts +++ b/src/llm/base-llm.ts @@ -2,7 +2,7 @@ import { StreamTextResult } from 'ai'; import { AgentContext } from '#agent/agentContextTypes'; import { countTokens } from '#llm/tokens'; import { GenerateJsonOptions, GenerateTextOptions, LLM, LlmMessage } from './llm'; -import { extractJsonResult, extractStringResult } from './responseParsers'; +import { extractJsonResult, extractTag } from './responseParsers'; export interface SerializedLLM { service: string; @@ -20,7 +20,7 @@ export abstract class BaseLLM implements LLM { constructor( protected readonly displayName: string, protected readonly service: string, - protected readonly model: string, + protected model: string, protected maxInputTokens: number, readonly calculateInputCost: InputCostFunction, readonly calculateOutputCost: OutputCostFunction, @@ -100,7 +100,7 @@ export abstract class BaseLLM implements LLM { ): Promise { const { messages, options } = this.parseGenerateTextParameters(userOrSystemOrMessages, userOrOpts, opts); const response = await this.generateText(messages, options); - return extractStringResult(response); + return extractTag(response, 'result'); } generateTextWithJson(userPrompt: string, opts?: GenerateTextOptions): Promise; diff --git a/src/llm/llm.ts b/src/llm/llm.ts index dedb0514..92b8fe12 100644 --- a/src/llm/llm.ts +++ b/src/llm/llm.ts @@ -1,10 +1,10 @@ // https://github.com/AgentOps-AI/tokencost/blob/main/tokencost/model_prices.json -import { CoreMessage, FilePart, ImagePart, StreamTextResult, TextPart } from 'ai'; +import { CoreMessage, FilePart, ImagePart, StreamTextResult, TextPart, UserContent } from 'ai'; // Should match fields in CallSettings in node_modules/ai/dist/index.d.ts export interface GenerateOptions { /** - * Temperature controls the randomness in token selection. Valid values are between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. Defaults to 1 + * Temperature controls the randomness in token selection. Valid values are between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. * We generally recommend altering this or top_p but not both. */ temperature?: number; @@ -42,6 +42,8 @@ export interface GenerateOptions { Providers may have limits on the number of stop sequences. */ stopSequences?: string[]; + + maxRetries?: number; } export interface GenerateTextOptions extends GenerateOptions { @@ -55,11 +57,6 @@ export interface GenerateTextOptions extends GenerateOptions { */ export type GenerateJsonOptions = Omit; -/** - * Options when generating text expecting function calls - */ -export type GenerateFunctionOptions = Omit; - /* Types from the 'ai' package: @@ -122,6 +119,10 @@ export type LlmMessage = CoreMessage & { time?: number; }; +export function userContentText(userContent: UserContent | any): string { + return typeof userContent === 'string' ? userContent : userContent.find((content) => content.type === 'text')?.text; +} + export function system(text: string, cache = false): LlmMessage { return { role: 'system', @@ -154,7 +155,7 @@ export interface LLM { /** Generates text from a LLM */ generateText(userPrompt: string, opts?: GenerateTextOptions): Promise; generateText(systemPrompt: string, userPrompt: string, opts?: GenerateTextOptions): Promise; - generateText(messages: LlmMessage[], opts?: GenerateTextOptions): Promise; + generateText(messages: LlmMessage[] | ReadonlyArray, opts?: GenerateTextOptions): Promise; /** * Generates a response that ends with a JSON object wrapped in either tags or Markdown triple ticks. @@ -163,19 +164,20 @@ export interface LLM { */ generateTextWithJson(userPrompt: string, opts?: GenerateJsonOptions): Promise; generateTextWithJson(systemPrompt: string, userPrompt: string, opts?: GenerateJsonOptions): Promise; - generateTextWithJson(messages: LlmMessage[], opts?: GenerateJsonOptions): Promise; + generateTextWithJson(messages: LlmMessage[] | ReadonlyArray, opts?: GenerateJsonOptions): Promise; /** Generates a response which only returns a JSON object. */ generateJson(userPrompt: string, opts?: GenerateJsonOptions): Promise; generateJson(systemPrompt: string, userPrompt: string, opts?: GenerateJsonOptions): Promise; - generateJson(messages: LlmMessage[], opts?: GenerateJsonOptions): Promise; + generateJson(messages: LlmMessage[] | ReadonlyArray, opts?: GenerateJsonOptions): Promise; + /** * Generates a response that is expected to have a element, and returns the text inside it. * This useful when you want to LLM to output discovery, reasoning, etc. to improve the answer, and only want the final result returned. */ generateTextWithResult(prompt: string, opts?: GenerateTextOptions): Promise; generateTextWithResult(systemPrompt: string, prompt: string, opts?: GenerateTextOptions): Promise; - generateTextWithResult(messages: LlmMessage[], opts?: GenerateTextOptions): Promise; + generateTextWithResult(messages: LlmMessage[] | ReadonlyArray, opts?: GenerateTextOptions): Promise; /** * Streams text from the LLM @@ -183,7 +185,11 @@ export interface LLM { * @param onChunk streaming chunk callback * @param opts */ - streamText(messages: LlmMessage[], onChunk: ({ string }) => void, opts?: GenerateTextOptions): Promise>; + streamText( + messages: LlmMessage[] | ReadonlyArray, + onChunk: ({ string }) => void, + opts?: GenerateTextOptions, + ): Promise>; /** * The service provider of the LLM (OpenAI, Google, TogetherAI etc) diff --git a/src/llm/llmFactory.ts b/src/llm/llmFactory.ts index f9bac5d4..e746dd18 100644 --- a/src/llm/llmFactory.ts +++ b/src/llm/llmFactory.ts @@ -1,6 +1,9 @@ import { AgentLLMs } from '#agent/agentContextTypes'; import { LLM } from '#llm/llm'; import { blueberryLLMRegistry } from '#llm/multi-agent/blueberry'; +import { CePO_LLMRegistry } from '#llm/multi-agent/cepo'; +import { deepSeekFallbackRegistry } from '#llm/multi-agent/deepSeekR1_Fallbacks'; +import { MoA_reasoningLLMRegistry } from '#llm/multi-agent/reasoning-debate'; import { MultiLLM } from '#llm/multi-llm'; import { anthropicLLMRegistry } from '#llm/services/anthropic'; import { anthropicVertexLLMRegistry } from '#llm/services/anthropic-vertex'; @@ -10,6 +13,7 @@ import { deepseekLLMRegistry } from '#llm/services/deepseek'; import { fireworksLLMRegistry } from '#llm/services/fireworks'; import { groqLLMRegistry } from '#llm/services/groq'; import { mockLLMRegistry } from '#llm/services/mock-llm'; +import { nebiusLLMRegistry } from '#llm/services/nebius'; import { ollamaLLMRegistry } from '#llm/services/ollama'; import { openAiLLMRegistry } from '#llm/services/openai'; import { perplexityLLMRegistry } from '#llm/services/perplexity-llm'; @@ -31,8 +35,12 @@ export const LLM_FACTORY: Record LLM> = { ...cerebrasLLMRegistry(), ...perplexityLLMRegistry(), ...xaiLLMRegistry(), + ...nebiusLLMRegistry(), ...ollamaLLMRegistry(), + ...deepSeekFallbackRegistry(), ...blueberryLLMRegistry(), + ...MoA_reasoningLLMRegistry(), + ...CePO_LLMRegistry(), ...mockLLMRegistry(), }; diff --git a/src/llm/multi-agent/blackberry.ts b/src/llm/multi-agent/blackberry.ts index 8f10a867..5a2a9046 100644 --- a/src/llm/multi-agent/blackberry.ts +++ b/src/llm/multi-agent/blackberry.ts @@ -80,6 +80,10 @@ export class Blackberry extends BaseLLM { ); } + isConfigured(): boolean { + return this.mediator.isConfigured() && this.llms.findIndex((llm) => !llm.isConfigured()) === -1; + } + async _generateText(systemPrompt: string | undefined, userPrompt: string, opts?: GenerateTextOptions): Promise { if (systemPrompt) { logger.error('system prompt not available for Blueberry'); diff --git a/src/llm/multi-agent/blueberry.ts b/src/llm/multi-agent/blueberry.ts index 335ca651..ca7b112d 100644 --- a/src/llm/multi-agent/blueberry.ts +++ b/src/llm/multi-agent/blueberry.ts @@ -15,7 +15,7 @@ import { withActiveSpan } from '#o11y/trace'; export function blueberryLLMRegistry(): Record LLM> { return { - 'blueberry:': () => new Blueberry(), + 'MoA blueberry:': () => new Blueberry(), }; } @@ -88,7 +88,7 @@ export class Blueberry extends BaseLLM { */ constructor(model = 'default') { super( - 'Blueberry', + 'MoA', 'blueberry', model, 200_000, @@ -130,16 +130,16 @@ export class Blueberry extends BaseLLM { } logger.info('Initial response...'); const initialResponses = await this.generateInitialResponses(userPrompt, MIND_OVER_DATA_SYS_PROMPT, opts); - const debatedResponses = await this.multiAgentDebate(initialResponses, MIND_OVER_DATA_SYS_PROMPT, opts); + const debatedResponses = await this.multiAgentDebate(userPrompt, initialResponses, MIND_OVER_DATA_SYS_PROMPT, opts); logger.info('Mediating response...'); return this.mergeBestResponses(userPrompt, debatedResponses); } - private async generateInitialResponses(userPrompt: string, systemPrompt?: string, opts?: GenerateTextOptions): Promise { + private async generateInitialResponses(userPrompt: string, systemPrompt: string, opts?: GenerateTextOptions): Promise { return Promise.all(this.llms.map((llm) => llm.generateText(systemPrompt, userPrompt, { ...opts, temperature: 1 }))); } - private async multiAgentDebate(responses: string[], systemPromptSrc?: string, opts?: GenerateTextOptions, rounds = 4): Promise { + private async multiAgentDebate(userPrompt: string, responses: string[], systemPromptSrc: string, opts?: GenerateTextOptions, rounds = 4): Promise { let debatedResponses = responses; for (let round = 1; round < rounds; round++) { @@ -148,7 +148,7 @@ export class Blueberry extends BaseLLM { this.llms.map((llm, index) => { const leftNeighborIndex = (index - 1 + this.llms.length) % this.llms.length; const rightNeighborIndex = (index + 1) % this.llms.length; - const newUserPrompt = `${responses[index]}\n\nBelow are responses from two other agents:\n\n${responses[leftNeighborIndex]}\n\n\n\n${responses[rightNeighborIndex]}\n\n\nUse the insights from all the responses to refine and update your answer in the same format.`; + const newUserPrompt = `\n${userPrompt}\n\n\n${responses[index]}\n\nBelow are responses from two other agents:\n\n${responses[leftNeighborIndex]}\n\n\n\n${responses[rightNeighborIndex]}\n\n\nUse the insights from all the responses to refine and update your answer in the same format.`; return llm.generateText(systemPromptSrc, newUserPrompt, opts); }), ); diff --git a/src/llm/multi-agent/cepo.ts b/src/llm/multi-agent/cepo.ts new file mode 100644 index 00000000..85274228 --- /dev/null +++ b/src/llm/multi-agent/cepo.ts @@ -0,0 +1,347 @@ +import { BaseLLM } from '#llm/base-llm'; +import { GenerateTextOptions, LLM, LlmMessage, assistant, system, user, userContentText } from '#llm/llm'; +import { cerebrasLlama3_3_70b } from '#llm/services/cerebras'; +import { logger } from '#o11y/logger'; +import { withActiveSpan } from '#o11y/trace'; + +interface CePOConfig { + bestofn_n: number; + bestofn_temperature: number; + bestofn_max_tokens: number; + bestofn_rating_type: 'absolute' | 'pairwise'; + planning_n: number; + planning_m: number; + planning_temperature_step1: number; + planning_temperature_step2: number; + planning_temperature_step3: number; + planning_temperature_step4: number; + planning_max_tokens_step1: number; + planning_max_tokens_step2: number; + planning_max_tokens_step3: number; + planning_max_tokens_step4: number; + printOutput: boolean; +} + +const config: CePOConfig = { + bestofn_n: 3, + bestofn_temperature: 0.1, + bestofn_max_tokens: 4096, + bestofn_rating_type: 'absolute', + planning_n: 3, + planning_m: 6, + planning_temperature_step1: 0.55, + planning_temperature_step2: 0.25, + planning_temperature_step3: 0.1, + planning_temperature_step4: 0, + planning_max_tokens_step1: 4096, + planning_max_tokens_step2: 4096, + planning_max_tokens_step3: 4096, + planning_max_tokens_step4: 4096, + printOutput: false, +}; + +// https://github.com/codelion/optillm/blob/main/optillm/cepo/README.md + +export function CePO_LLMRegistry(): Record LLM> { + const registry = {}; + registry[`CePO:${cerebrasLlama3_3_70b().getId()}`] = () => CePO_Cerebras_Llama70b(); + return registry; +} + +export function CePO_Cerebras_Llama70b(): LLM { + return new CePO_LLM(cerebrasLlama3_3_70b, 'CePO (Llama 3.3 70b Cerebras)'); +} + +/** + * The Cerebras Planning and Optimization (CePO) Method + * + * CePO is an inference-time computation method designed to enhance the accuracy of large language models (LLMs) on tasks requiring reasoning and planning, such as solving math or coding problems. It integrates several advanced techniques, including Best of N, Chain of Thought (CoT), Self-Reflection, Self-Improvement, and Prompt Engineering. + * + * If you have any questions or want to contribute, please reach out to us on cerebras.ai/discord + * + * CePO Methodology + * + * In CePO, the Best of N technique is applied to bestofn_n solution candidates. Each solution is generated through the following four steps: + * + * Step 1: Plan Generation The model generates a detailed, step-by-step plan to solve the problem, along with its confidence level for each step. + * + * Step 2: Initial Solution Using the plan from Step 1, the model produces an initial solution. + * + * Steps 1 and 2 are repeated planning_n times to generate multiple solution proposals. If the model exceeds the token budget during Step 1 or 2, the plan/solution is marked as incomplete, rejected, and regenerated. A maximum of planning_m attempts is made to generate planning_n valid proposals. + * + * Step 3: Plan Refinement The model reviews all generated solution proposals and their associated plans, identifying inconsistencies. Based on this analysis, a refined, final step-by-step plan is constructed. + * + * Step 4: Final Solution The model uses the refined plan from Step 3 to produce the final answer. + * @constructor + */ +export class CePO_LLM extends BaseLLM { + llm: LLM; + + /** + * @param llmProvider + * @param name + */ + constructor(llmProvider?: () => LLM, name?: string) { + super( + name ?? '(CePO)', + 'CePO', + llmProvider().getId(), + 128_000, + () => 0, + () => 0, + ); + this.llm = llmProvider(); + } + + getModel(): string { + return this.llm.getId(); + } + + isConfigured(): boolean { + return this.llm.isConfigured(); + } + + protected supportsGenerateTextFromMessages(): boolean { + return true; + } + + protected async generateTextFromMessages(llmMessages: LlmMessage[], opts?: GenerateTextOptions): Promise { + return withActiveSpan(`CePO id:${opts.id}`, async () => { + const completions: string[] = []; + for (let i = 0; i < config.bestofn_n; i++) { + const completion = await this.generateCompletion(llmMessages, opts); + completions.push(completion); + } + + const bestAnswer = await this.rateAnswers(completions, llmMessages, opts); + + return bestAnswer; + }); + } + + private async generatePlan(llmMessages: LlmMessage[], opts?: GenerateTextOptions): Promise { + const userMessageContent = userContentText(llmMessages[llmMessages.length - 1].content); + // TODO replace the last message with the planning prompt + const planPrompt = `To answer this question, can you come up with a concise plan to solve it step-by-step but do not provide the final answer. Also, for each step, provide your confidence in the correctness of that step as well as your ability to execute it correctly. Here is the question:\n${userMessageContent}`; + const messages: LlmMessage[] = [...llmMessages, user(planPrompt)]; + + try { + const plan = await this.llm.generateText(messages, { + ...opts, + temperature: config.planning_temperature_step1, + }); + if (config.printOutput) { + logger.debug(`Generated plan: ${plan}`); + } + return plan; + } catch (error) { + logger.error(`Error during plan generation: ${error}`); + throw error; + } + } + + private async executePlan(plan: string, llmMessages: LlmMessage[], opts?: GenerateTextOptions): Promise { + const executePrompt = + 'Can you execute the above plan step-by-step to produce the final answer. Be extra careful when executing steps where your confidence is lower.'; + const messages: LlmMessage[] = [...llmMessages, assistant(plan), user(executePrompt)]; + + try { + const solution = await this.llm.generateText(messages, { + ...opts, + temperature: config.planning_temperature_step2, + }); + if (config.printOutput) { + logger.debug(`Execution result: ${solution}`); + } + return solution; + } catch (error) { + logger.error(`Error during plan execution: ${error}`); + throw error; + } + } + + private async refinePlan(plans: string[], llmMessages: LlmMessage[], opts?: GenerateTextOptions): Promise { + const userMessageContent = userContentText(llmMessages[llmMessages.length - 1].content); + const combinedPlans = plans.map((plan, index) => `Plan ${index + 1}:\n${plan}`).join('\n\n'); + + const refinePrompt = `Can you review the following plans and identify any inconsistencies between them. After that, can you address them and present a final step-by-step solution to the problem? Here is the question:\n${userMessageContent}`; + + const messages: LlmMessage[] = [...llmMessages]; + messages.push(assistant(combinedPlans)); + messages.push(user(refinePrompt)); + + try { + const refinedPlan = await this.llm.generateText(messages, { + ...opts, + temperature: config.planning_temperature_step3, + // maxTokens: config.planning_max_tokens_step3, + }); + logger.debug(`Refined plan: ${refinedPlan}`); + return refinedPlan; + } catch (error) { + logger.error(`Error during plan refinement: ${error}`); + throw error; + } + } + + private async generateFinalAnswer(refinedPlan: string, llmMessages: LlmMessage[], opts?: GenerateTextOptions): Promise { + const userMessageContent = userContentText(llmMessages[llmMessages.length - 1].content); + const finalAnswerPrompt = `Use your final solution from above to correctly answer the question. Here is the question:\n${userMessageContent}`; + + const messages: LlmMessage[] = [...llmMessages]; + messages.push(assistant(refinedPlan)); + messages.push(user(finalAnswerPrompt)); + + const finalAnswer = await this.llm.generateText(messages, { ...opts, temperature: 0 }); + + return finalAnswer; + } + + private async rateAnswers(answers: string[], llmMessages: LlmMessage[], opts?: GenerateTextOptions): Promise { + if (config.bestofn_rating_type === 'absolute') { + return this.rateAnswersAbsolute(answers, llmMessages, opts); + } + if (config.bestofn_rating_type === 'pairwise') { + return this.rateAnswersPairwise(answers, llmMessages, opts); + } + throw new Error(`Invalid rating type: ${config.bestofn_rating_type}`); + } + + private extractQuestionOnly(task: string): string { + let questionOnly = task.replace('\n## Question: \n\n', ''); + questionOnly = questionOnly.replace(/\n\n\n## Instruction[\s\S]*```json\n{\n {4}"reasoning": "___",\n {4}"answer": "___"\n}\n```/g, ''); + return questionOnly.trim(); + } + + private async generateCompletion(llmMessages: LlmMessage[], opts?: GenerateTextOptions): Promise { + const plans: string[] = []; + let attempts = 0; + + // Step 1 and 2: Generate Plans and Execute Plans with retries + while (plans.length < config.planning_n && attempts < config.planning_m) { + attempts++; + try { + const plan = await this.generatePlan(llmMessages, opts); + const solution = await this.executePlan(plan, llmMessages, opts); + plans.push(solution); + } catch (error) { + logger.error(`Plan generation attempt ${attempts} failed: ${error}`); + } + } + + // If no valid plans, proceed with the last attempted plan + if (plans.length === 0) { + logger.warn('No valid plans generated. Proceeding with the last attempted plan.'); + // Attempt to generate one last plan + try { + const lastPlan = await this.generatePlan(llmMessages, opts); + const lastSolution = await this.executePlan(lastPlan, llmMessages, opts); + plans.push(lastSolution); + } catch (error) { + logger.error('Failed to generate a fallback plan.'); + // Re-throw the error if unable to proceed + throw new Error('Failed to generate any valid plans.'); + } + } + + // Step 3: Refine Plan + const refinedPlan = await this.refinePlan(plans, llmMessages, opts); + + // Step 4: Generate Final Answer + const finalAnswer = await this.generateFinalAnswer(refinedPlan, llmMessages, opts); + + return finalAnswer; + } + + private async rateAnswersAbsolute(answers: string[], llmMessages: LlmMessage[], opts?: GenerateTextOptions): Promise { + const ratings: number[] = []; + const userMessageContent = userContentText(llmMessages[llmMessages.length - 1].content); + + const ratingPrompt = `Please act as an impartial judge and evaluate the quality of the response provided by an AI assistant to the user question displayed below. Your evaluation should consider correctness as a primary factor as well as other factors such as helpfulness, relevance, accuracy, depth, creativity, and level of detail of the response. + +Evaluation Criteria: +- Correctness: How free is it from errors or mistakes? +- Helpfulness: How effectively does the response meet the user's needs? +- Relevance: How directly does the response address the original question? +- Accuracy: Are the information and explanations factually correct? +- Depth: Does the response provide comprehensive and meaningful insights? +- Creativity: Does the response offer unique or innovative perspectives? +- Clarity: Is the response well-organized, coherent, and easy to understand? + +Begin your evaluation by providing a short explanation. Be as objective as possible. After providing your explanation, please rate the response on a scale of 1 to 10 by strictly following this format: "Rating: [[rating]]", for example: "Rating: [[5]]"`; + + for (const answer of answers) { + const messages: LlmMessage[] = [...llmMessages]; + messages.push(assistant(answer)); + messages.push(user(ratingPrompt)); + + const ratingResponse = await this.llm.generateText(messages, { + ...opts, + temperature: config.bestofn_temperature, + //maxTokens: config.bestofn_max_tokens, + }); + const ratingMatch = ratingResponse.match(/Rating: \[\[(\d+)\]\]/); + const rating = ratingMatch ? parseInt(ratingMatch[1], 10) : 0; + ratings.push(rating); + } + + // Select the answer with the highest rating + const bestAnswerIndex = ratings.indexOf(Math.max(...ratings)); + return answers[bestAnswerIndex]; + } + + private async rateAnswersPairwise(answers: string[], llmMessages: LlmMessage[], opts?: GenerateTextOptions): Promise { + const ratings: number[] = new Array(answers.length).fill(0); + const pairs = this.generatePairs(answers.length); + + const ratingPrompt = `Please act as an impartial judge and compare the quality of the two responses provided by the AI assistant to the user's question displayed below. Evaluation Criteria: +- Helpfulness: How effectively does the response meet the user's needs? +- Relevance: How directly does the response address the original question? +- Accuracy: Are the information and explanations factually correct? +- Depth: Does the response provide comprehensive and meaningful insights? +- Creativity: Does the response offer unique or innovative perspectives? +- Clarity: Is the response well-organized, coherent, and easy to understand? + +Evaluation Process: +1. Carefully review the user's question and the AI assistant's responses. +2. Compare the responses against each other for each criterion. +3. Provide a concise explanation of your overall evaluation. +4. Select the response that is superior based on the above criteria. + +Reply with "Better Response: [[response id]]". +If the first response is better, reply with "Better Response: [[0]]". +If the second response is better, reply with "Better Response: [[1]]".`; + + for (const [i, j] of pairs) { + const responsesPair = `Response 0: ${answers[i]}\n\nResponse 1: ${answers[j]}`; + const messages: LlmMessage[] = [...llmMessages]; + messages.push(assistant(responsesPair)); + messages.push(user(ratingPrompt)); + + const ratingResponse = await this.llm.generateText(messages, { + ...opts, + temperature: config.bestofn_temperature, + //maxTokens: config.bestofn_max_tokens, + }); + + const match = ratingResponse.match(/Better Response: \[\[(\d+)\]\]/); + if (match) { + const winner = parseInt(match[1], 10); + ratings[winner === 0 ? i : j]++; + } + } + + const bestAnswerIndex = ratings.indexOf(Math.max(...ratings)); + return answers[bestAnswerIndex]; + } + + private generatePairs(n: number): [number, number][] { + const pairs: [number, number][] = []; + for (let i = 0; i < n; i++) { + for (let j = i + 1; j < n; j++) { + pairs.push([i, j]); + } + } + return pairs; + } +} diff --git a/src/llm/multi-agent/deepSeekR1_Fallbacks.ts b/src/llm/multi-agent/deepSeekR1_Fallbacks.ts new file mode 100644 index 00000000..7e2d4a20 --- /dev/null +++ b/src/llm/multi-agent/deepSeekR1_Fallbacks.ts @@ -0,0 +1,55 @@ +import { logger } from '#o11y/logger'; +import { BaseLLM } from '../base-llm'; +import { GenerateTextOptions, LLM, LlmMessage } from '../llm'; +import { fireworksDeepSeekR1 } from '../services/fireworks'; + +import { togetherDeepSeekR1 } from '#llm/services/together'; + +export function deepSeekFallbackRegistry(): Record LLM> { + return { + DeepSeekFallback: DeepSeekR1_Together_Fireworks, + }; +} + +export function DeepSeekR1_Together_Fireworks(): LLM { + return new DeepSeekR1_Fallbacks(); +} + +/** + * LLM implementation for DeepSeek R1 which uses Together.ai and Fireworks.ai for more privacy. + * Tries Together.ai first as is slightly cheaper, then falls back to Fireworks + */ +export class DeepSeekR1_Fallbacks extends BaseLLM { + private together: LLM = togetherDeepSeekR1(); + private fireworks: LLM = fireworksDeepSeekR1(); + + constructor() { + super( + 'DeepSeek R1 (Together, Fireworks)', + 'DeepSeekFallback', + 'deepseek-r1-together-fireworks', + 0, // Initialized later + () => 0, + () => 0, + ); + } + + protected supportsGenerateTextFromMessages(): boolean { + return true; + } + + isConfigured(): boolean { + return this.together.isConfigured() && this.fireworks.isConfigured(); + } + + async generateTextFromMessages(messages: LlmMessage[], opts?: GenerateTextOptions): Promise { + try { + return await this.together.generateText(messages, { ...opts, maxRetries: 0 }); + } catch (e) { + const errMsg = e.statuCode === '429' ? 'rate limited' : `error: ${e.message}`; + logger.error(`Together DeepSeek ${errMsg}`); + + return await this.fireworks.generateText(messages, opts); + } + } +} diff --git a/src/llm/multi-agent/fastLlama70b.ts b/src/llm/multi-agent/fastLlama70b.ts new file mode 100644 index 00000000..435ab3a0 --- /dev/null +++ b/src/llm/multi-agent/fastLlama70b.ts @@ -0,0 +1,53 @@ +import { cerebrasLlama3_3_70b } from '#llm/services/cerebras'; +import { logger } from '#o11y/logger'; +import { BaseLLM } from '../base-llm'; +import { GenerateTextOptions, LLM, LlmMessage } from '../llm'; +import { fireworksLlama3_70B } from '../services/fireworks'; +import { groqLlama3_3_70B } from '../services/groq'; + +/** + * LLM implementation for Llama 3.3 70b that prioritizes speed and falls back to other providers. + */ +export class FastestLlama3_70B extends BaseLLM { + private readonly providers: LLM[]; + + constructor() { + super( + 'Llama3.3-70b (Fastest)', + 'multi', + 'fastest-llama3-70b', + 0, // Initialized later + () => 0, + () => 0, + ); + // Define the providers and their priorities. Lower number = higher priority + this.providers = [cerebrasLlama3_3_70b(), groqLlama3_3_70B(), fireworksLlama3_70B()]; + + this.maxInputTokens = Math.max(...this.providers.map((p) => p.getMaxInputTokens())); + } + + isConfigured(): boolean { + return this.providers.findIndex((llm) => !llm.isConfigured()) === -1; + } + + protected supportsGenerateTextFromMessages(): boolean { + return true; + } + + async generateTextFromMessages(messages: LlmMessage[], opts?: GenerateTextOptions): Promise { + for (const llm of this.providers) { + const combinedPrompt = messages.map((m) => m.content).join('\n'); + if (combinedPrompt.length > llm.getMaxInputTokens()) { + logger.warn(`Input tokens exceed limit for ${llm.getDisplayName()}. Trying next provider.`); + continue; + } + try { + logger.info(`Trying ${llm.getDisplayName()}`); + return await llm.generateText(messages, opts); + } catch (error) { + logger.error(`Error with ${llm.getDisplayName()}: ${error.message}. Trying next provider.`); + } + } + throw new Error('All Llama 3.3 70b providers failed.'); + } +} diff --git a/src/llm/multi-agent/reasoning-debate.ts b/src/llm/multi-agent/reasoning-debate.ts new file mode 100644 index 00000000..be887075 --- /dev/null +++ b/src/llm/multi-agent/reasoning-debate.ts @@ -0,0 +1,175 @@ +import { BaseLLM } from '#llm/base-llm'; +import { GenerateTextOptions, LLM, LlmMessage, userContentText } from '#llm/llm'; +import { getLLM } from '#llm/llmFactory'; +import { DeepSeekR1_Together_Fireworks } from '#llm/multi-agent/deepSeekR1_Fallbacks'; +import { Claude3_5_Sonnet_Vertex } from '#llm/services/anthropic-vertex'; +import { deepSeekR1, deepSeekV3 } from '#llm/services/deepseek'; +import { fireworksDeepSeek, fireworksDeepSeekR1 } from '#llm/services/fireworks'; +import { openAIo1 } from '#llm/services/openai'; +import { togetherDeepSeekR1 } from '#llm/services/together'; +import { Gemini_2_0_Flash_Thinking } from '#llm/services/vertexai'; +import { logger } from '#o11y/logger'; + +// sparse multi-agent debate https://arxiv.org/abs/2406.11776 + +export function MoA_reasoningLLMRegistry(): Record LLM> { + return { + 'MoA:R1x3': () => new ReasonerDebateLLM('R1x3', deepSeekV3, [deepSeekR1, deepSeekR1, deepSeekR1], 'MoA R1x3'), + 'MoA:R1x3-Together-Fireworks': Together_R1x3_Together_Fireworks, + 'MoA:R1x3-Together': () => Together_R1x3(), + 'MoA:Sonnet_R1,o1,Gemini_Together': TogetherMoA_Claude_Sonnet_R1x2_o1, + 'MoA:Sonnet-Claude-R1,o1,Gemini': () => + new ReasonerDebateLLM( + 'Sonnet-Claude-R1,o1,Gemini', + Claude3_5_Sonnet_Vertex, + [togetherDeepSeekR1, openAIo1, Gemini_2_0_Flash_Thinking], + 'MoA:R1,o1,Gemini', + ), + }; +} + +export function Together_R1x3_Together_Fireworks(): LLM { + return new ReasonerDebateLLM( + 'R1x3-Together-Fireworks', + DeepSeekR1_Together_Fireworks, + [DeepSeekR1_Together_Fireworks, DeepSeekR1_Together_Fireworks, DeepSeekR1_Together_Fireworks], + 'MoA R1x3 (Together, Fireworks)', + ); +} + +export function TogetherMoA_Claude_Sonnet_R1x2_o1(): LLM { + return new ReasonerDebateLLM( + 'Sonnet_R1,o1,Gemini_Together', + Claude3_5_Sonnet_Vertex, + [DeepSeekR1_Together_Fireworks, openAIo1, DeepSeekR1_Together_Fireworks], + 'MoA:Claude-R1,o1,Gemini (Together, Fireworks)', + ); +} + +export function Together_R1x3(): LLM { + return new ReasonerDebateLLM('R1x3-Together', togetherDeepSeekR1, [togetherDeepSeekR1, togetherDeepSeekR1, togetherDeepSeekR1], 'MoA R1x3 Together'); +} + +/** + * Multi-agent debate (spare communication topology) implementation with simple prompts for reasoning LLMs + * @constructor + */ +export class ReasonerDebateLLM extends BaseLLM { + llms: LLM[]; + mediator: LLM; + + /** + * + * @param modelIds LLM model ids to use seperated by the pipe character. The first id will be used as the mediator. The remaining will be used as the initial response/debate generation. + * @param providedMediator + * @param providedDebateLLMs + * @param name + */ + constructor(modelIds = '', providedMediator?: () => LLM, providedDebateLLMs?: Array<() => LLM>, name?: string) { + super( + name ?? '(MoA)', + 'MoA', + modelIds, + 128_000, + () => 0, + () => 0, + ); + if (providedMediator) this.mediator = providedMediator(); + if (providedDebateLLMs) { + this.llms = providedDebateLLMs.map((factory) => factory()); + // this.model = this.llms.map((llm) => llm.) + } + if (modelIds?.includes('|')) { + this.model = modelIds; + try { + const parts = modelIds.split('|'); + if (parts.length > 1) { + // Set the mediator + this.mediator = getLLM(parts[0]); + + // Set the LLMs + this.llms = parts.slice(1).map((llmId) => getLLM(llmId)); + } else { + throw new Error(); + } + } catch (e) { + throw new Error(`Invalid model string format for MoA ${modelIds}`); + } + } + } + + isConfigured(): boolean { + return this.mediator.isConfigured() && this.llms.findIndex((llm) => !llm.isConfigured()) === -1; + } + + getModel(): string { + return `${this.mediator.getId()}|${this.llms.map((llm) => llm.getId()).join('|')}`; + } + + protected supportsGenerateTextFromMessages(): boolean { + return true; + } + + protected async generateTextFromMessages(llmMessages: LlmMessage[], opts?: GenerateTextOptions): Promise { + const readOnlyMessages = llmMessages as ReadonlyArray>; + logger.info('Initial response...'); + const initialResponses: string[] = await this.generateInitialResponses(readOnlyMessages, opts); + logger.info('Debating...'); + const debatedResponses = await this.multiAgentDebate(readOnlyMessages, initialResponses, opts); + logger.info('Mediating response...'); + return this.mergeBestResponses(readOnlyMessages, debatedResponses); + } + + private async generateInitialResponses(llmMessages: ReadonlyArray>, opts?: GenerateTextOptions): Promise { + return Promise.all(this.llms.map((llm) => llm.generateText(llmMessages, { ...opts, temperature: 1 }))); + } + + private async multiAgentDebate( + llmMessages: ReadonlyArray>, + responses: string[], + opts?: GenerateTextOptions, + rounds = 0, + ): Promise { + let debatedResponses = responses; + const userMessage = userContentText(llmMessages[llmMessages.length - 1].content); + for (let round = 1; round < rounds; round++) { + logger.info(`Round ${round}...`); + debatedResponses = await Promise.all( + this.llms.map((llm, index) => { + const leftNeighborIndex = (index - 1 + this.llms.length) % this.llms.length; + const rightNeighborIndex = (index + 1) % this.llms.length; + const newUserPrompt = `\n${userMessage}\n\n\n\n${responses[index]}\n\n +Following are responses generated by other assistants:\n\n${responses[leftNeighborIndex]}\n\n\n\n${responses[rightNeighborIndex]}\n\n +Use the insights from all the responses to refine and update your response to the user message. +Do not mention the multiple responses provided. +Ensure any relevant response formatting instructions are followed.`; + + const debateMessages: LlmMessage[] = [...llmMessages]; + debateMessages[debateMessages.length - 1] = { role: 'user', content: newUserPrompt }; + // const debateMessages: LlmMessage[] = [...llmMessages, { role: 'user', content: newUserPrompt }]; + return llm.generateText(debateMessages, opts); + }), + ); + } + + return debatedResponses; + } + + private async mergeBestResponses(llmMessages: ReadonlyArray>, responses: string[], systemPrompt?: string): Promise { + // TODO convert content to string + const originalMessage = userContentText(llmMessages[llmMessages.length - 1].content); + const mergePrompt = `\n${originalMessage}\n + +Following are responses generated by other assistants: +${responses.map((response, index) => `\n${response}\n`).join('\n\n')} + +Look at the again and use the insights from all the assistant responses to provide a final response. Do not mention the multiple responses provided. +Answer directly to the original user message and ensure any relevant response formatting instructions are followed. + `; + const mergedMessages: LlmMessage[] = [...llmMessages]; + mergedMessages[mergedMessages.length - 1] = { role: 'user', content: mergePrompt }; + const generation = this.mediator.generateText(mergedMessages, { temperature: 0.7 }); + logger.info('Merging best response...'); + return await generation; + } +} diff --git a/src/llm/responseParsers.test.ts b/src/llm/responseParsers.test.ts index 1c24975e..7fbf5ec0 100644 --- a/src/llm/responseParsers.test.ts +++ b/src/llm/responseParsers.test.ts @@ -1,6 +1,6 @@ import { expect } from 'chai'; -import { extractJsonResult, extractStringResult, parseFunctionCallsXml } from './responseParsers'; +import { extractJsonResult, extractTag, parseFunctionCallsXml } from './responseParsers'; describe('responseParsers', () => { describe('extractJsonResult', () => { @@ -60,12 +60,12 @@ describe('responseParsers', () => { }); }); - describe('extractStringResult', () => { + describe('extractTagResult', () => { it('Should extract contents in tag', async () => { const result = ` Some result `; - const object = extractStringResult(result); + const object = extractTag(result, 'result'); expect(object).to.deep.equal('Some result'); }); }); diff --git a/src/llm/responseParsers.ts b/src/llm/responseParsers.ts index 5866633d..d78ab803 100644 --- a/src/llm/responseParsers.ts +++ b/src/llm/responseParsers.ts @@ -106,17 +106,18 @@ export function extractJsonResult(rawText: string): any { } /** - * Extracts the text within tags + * Extracts the text within tags * @param response response from the LLM + * @param tagName the name of the XML tag to extract the contents of */ -export function extractStringResult(response: string): any { - const index = response.lastIndexOf(''); - if (index < 0) throw new Error('Could not find in response'); +export function extractTag(response: string, tagName: string): any { + const index = response.lastIndexOf(`<${tagName}>`); + if (index < 0) throw new Error(`Could not find <${tagName}> in response`); const resultText = response.slice(index); - const regexXml = /(.*)<\/result>/is; + const regexXml = new RegExp(`<${tagName}>(.*)<\/${tagName}>`, 'is'); const matchXml = regexXml.exec(resultText); - if (!matchXml) throw new Error(`Could not find in the response \n${resultText}`); + if (!matchXml) throw new Error(`Could not find <${tagName}> in the response \n${resultText}`); return matchXml[1].trim(); } diff --git a/src/llm/services/ai-llm.ts b/src/llm/services/ai-llm.ts index 580cdd92..f918c516 100644 --- a/src/llm/services/ai-llm.ts +++ b/src/llm/services/ai-llm.ts @@ -48,6 +48,9 @@ export abstract class AiLLM extends BaseLLM { return withActiveSpan(`generateTextFromMessages ${opts?.id ?? ''}`, async (span) => { const messages: CoreMessage[] = this.processMessages(llmMessages); + // Gemini Flash 2.0 thinking max is about 42 + if (opts?.topK > 40) opts.topK = 40; + const prompt = messages.map((m) => m.content).join('\n'); span.setAttributes({ inputChars: prompt.length, @@ -74,6 +77,7 @@ export abstract class AiLLM extends BaseLLM { frequencyPenalty: opts?.frequencyPenalty, presencePenalty: opts?.presencePenalty, stopSequences: opts?.stopSequences, + maxRetries: opts?.maxRetries, }); const responseText = result.text; diff --git a/src/llm/services/cerebras.ts b/src/llm/services/cerebras.ts index e7a42c91..3b2c7ed6 100644 --- a/src/llm/services/cerebras.ts +++ b/src/llm/services/cerebras.ts @@ -1,5 +1,6 @@ import { createCerebras } from '@ai-sdk/cerebras'; import { OpenAIProvider } from '@ai-sdk/openai'; +import { InputCostFunction, OutputCostFunction, perMilTokens } from '#llm/base-llm'; import { currentUser } from '#user/userService/userContext'; import { envVar } from '#utils/env-var'; import { LLM } from '../llm'; @@ -15,42 +16,18 @@ export function cerebrasLLMRegistry(): Record LLM> { } export function cerebrasLlama3_8b(): LLM { - return new CerebrasLLM( - 'Llama 3.1 8b (Cerebras)', - 'llama3.1-8b', - 8_192, - (input: string) => 0, //(input.length * 0.05) / (1_000_000 * 4), - (output: string) => 0, //(output.length * 0.08) / (1_000_000 * 4), - 0, - 0, - ); + return new CerebrasLLM('Llama 3.1 8b (Cerebras)', 'llama3.1-8b', 8_192, perMilTokens(0.1), perMilTokens(0.1)); } export function cerebrasLlama3_3_70b(): LLM { - return new CerebrasLLM( - 'Llama 3.3 70b (Cerebras)', - 'llama-3.3-70b', - 8_192, - (input: string) => 0, //(input.length * 0.05) / (1_000_000 * 4), - (output: string) => 0, //(output.length * 0.08) / (1_000_000 * 4), - 0.6, - 0.6, - ); + return new CerebrasLLM('Llama 3.3 70b (Cerebras)', 'llama-3.3-70b', 8_192, perMilTokens(0.85), perMilTokens(1.2)); } /** * https://inference-docs.cerebras.ai/introduction */ export class CerebrasLLM extends AiLLM { - constructor( - displayName: string, - model: string, - maxInputTokens: number, - calculateInputCost: (input: string) => number, - calculateOutputCost: (output: string) => number, - private costPerMillionInputTokens: number, - private costPerMillionOutputTokens: number, - ) { + constructor(displayName: string, model: string, maxInputTokens: number, calculateInputCost: InputCostFunction, calculateOutputCost: OutputCostFunction) { super(displayName, CEREBRAS_SERVICE, model, maxInputTokens, calculateInputCost, calculateOutputCost); } diff --git a/src/llm/services/deepinfra.ts b/src/llm/services/deepinfra.ts index 59b28a0c..992d5f5b 100644 --- a/src/llm/services/deepinfra.ts +++ b/src/llm/services/deepinfra.ts @@ -1,4 +1,5 @@ import { OpenAIProvider, createOpenAI } from '@ai-sdk/openai'; +import { InputCostFunction, OutputCostFunction, perMilTokens } from '#llm/base-llm'; import { AiLLM } from '#llm/services/ai-llm'; import { currentUser } from '#user/userService/userContext'; import { LLM } from '../llm'; @@ -6,13 +7,7 @@ import { LLM } from '../llm'; export const DEEPINFRA_SERVICE = 'deepinfra'; export class Deepinfra extends AiLLM { - constructor( - displayName: string, - model: string, - maxTokens: number, - calculateInputCost: (input: string) => number, - calculateOutputCost: (output: string) => number, - ) { + constructor(displayName: string, model: string, maxTokens: number, calculateInputCost: InputCostFunction, calculateOutputCost: OutputCostFunction) { super(displayName, DEEPINFRA_SERVICE, model, maxTokens, calculateInputCost, calculateOutputCost); } @@ -32,44 +27,36 @@ export class Deepinfra extends AiLLM { return this.aiProvider; } } - +// https://deepinfra.com/models/text-generation export function deepinfraLLMRegistry(): Record LLM> { return { [`${DEEPINFRA_SERVICE}:Qwen/QwQ-32B-Preview`]: deepinfraQwQ_32B, [`${DEEPINFRA_SERVICE}:Qwen/Qwen2.5-Coder-32B-Instruct`]: deepinfraQwen2_5_Coder32B, [`${DEEPINFRA_SERVICE}:Qwen/Qwen2.5-72B-Instruct`]: deepinfraQwen2_5_72B, + [`${DEEPINFRA_SERVICE}:deepseek-ai/DeepSeek-R1`]: deepinfraDeepSeekR1, + [`${DEEPINFRA_SERVICE}:deepseek-ai/DeepSeek-R1-Distill-Llama-70B`]: deepinfraDeepSeekR1_Distill_Llama70b, }; } // https://deepinfra.com/Qwen/QwQ-32B-Preview export function deepinfraQwQ_32B(): LLM { - return new Deepinfra( - 'QwQ-32B-Preview (deepinfra)', - 'Qwen/QwQ-32B-Preview', - 32_768, - (input: string) => (input.length * 0.15) / 1_000_000 / 4, - (output: string) => (output.length * 0.6) / 1_000_000 / 4, - ); + return new Deepinfra('QwQ-32B-Preview (deepinfra)', 'Qwen/QwQ-32B-Preview', 32_768, perMilTokens(0.15), perMilTokens(0.6)); +} + +export function deepinfraDeepSeekR1(): LLM { + return new Deepinfra('DeepSeek R1 (deepinfra)', 'deepseek-ai/DeepSeek-R1', 15_000, perMilTokens(0.85), perMilTokens(2.5)); +} + +export function deepinfraDeepSeekR1_Distill_Llama70b(): LLM { + return new Deepinfra('DeepSeek R1 Llama 70b (deepinfra)', 'deepseek-ai/DeepSeek-R1-Distill-Llama-70B', 128_000, perMilTokens(0.23), perMilTokens(0.69)); } // https://deepinfra.com/Qwen/Qwen2.5-Coder-32B-Instruct export function deepinfraQwen2_5_Coder32B(): LLM { - return new Deepinfra( - 'Qwen2.5-Coder-32B-Instruct (deepinfra)', - 'Qwen/Qwen2.5-Coder-32B-Instruct', - 32_768, - (input: string) => (input.length * 0.08) / 1_000_000 / 4, - (output: string) => (output.length * 0.18) / 1_000_000 / 4, - ); + return new Deepinfra('Qwen2.5-Coder-32B-Instruct (deepinfra)', 'Qwen/Qwen2.5-Coder-32B-Instruct', 32_768, perMilTokens(0.07), perMilTokens(0.16)); } // https://deepinfra.com/Qwen/Qwen2.5-72B-Instruct export function deepinfraQwen2_5_72B(): LLM { - return new Deepinfra( - 'Qwen2.5-72B-Instruct (deepinfra)', - 'Qwen/Qwen2.5-72B-Instruct', - 32_768, - (input: string) => (input.length * 0.23) / 1_000_000 / 4, - (output: string) => (output.length * 0.4) / 1_000_000 / 4, - ); + return new Deepinfra('Qwen2.5-72B-Instruct (deepinfra)', 'Qwen/Qwen2.5-72B-Instruct', 32_768, perMilTokens(0.23), perMilTokens(0.4)); } diff --git a/src/llm/services/deepseek.ts b/src/llm/services/deepseek.ts index 0d215b1e..d65dc877 100644 --- a/src/llm/services/deepseek.ts +++ b/src/llm/services/deepseek.ts @@ -1,4 +1,5 @@ import { DeepSeekProvider, createDeepSeek } from '@ai-sdk/deepseek'; +import { InputCostFunction, OutputCostFunction, perMilTokens } from '#llm/base-llm'; import { currentUser } from '#user/userService/userContext'; import { envVar } from '#utils/env-var'; import { LLM } from '../llm'; @@ -9,28 +10,16 @@ export const DEEPSEEK_SERVICE = 'deepseek'; export function deepseekLLMRegistry(): Record LLM> { return { [`${DEEPSEEK_SERVICE}:deepseek-chat`]: deepSeekV3, - [`${DEEPSEEK_SERVICE}:deepseek-r1`]: deepSeekR1, + [`${DEEPSEEK_SERVICE}:deepseek-reasoner`]: deepSeekR1, }; } export function deepSeekV3(): LLM { - return new DeepSeekLLM( - 'DeepSeek v3', - 'deepseek-chat', - 64000, - (input: string) => (input.length * 0.14) / (1_000_000 * 3.5), - (output: string) => (output.length * 0.28) / (1_000_000 * 3.5), - ); + return new DeepSeekLLM('DeepSeek v3', 'deepseek-chat', 64000, perMilTokens(0.14), perMilTokens(0.28)); } export function deepSeekR1(): LLM { - return new DeepSeekLLM( - 'DeepSeek R1', - 'deepseek-r1', - 64000, - (input: string) => (input.length * 0.55) / (1_000_000 * 3.5), - (output: string) => (output.length * 2.19) / (1_000_000 * 3.5), - ); + return new DeepSeekLLM('DeepSeek R1', 'deepseek-reasoner', 64000, perMilTokens(0.55), perMilTokens(2.19)); } /** @@ -38,13 +27,7 @@ export function deepSeekR1(): LLM { * @see https://platform.deepseek.com/api-docs/api/create-chat-completion */ export class DeepSeekLLM extends AiLLM { - constructor( - displayName: string, - model: string, - maxTokens: number, - inputCostPerToken: (input: string) => number, - outputCostPerToken: (output: string) => number, - ) { + constructor(displayName: string, model: string, maxTokens: number, inputCostPerToken: InputCostFunction, outputCostPerToken: OutputCostFunction) { super(displayName, DEEPSEEK_SERVICE, model, maxTokens, inputCostPerToken, outputCostPerToken); } diff --git a/src/llm/services/fireworks.ts b/src/llm/services/fireworks.ts index b1260606..7a08555e 100644 --- a/src/llm/services/fireworks.ts +++ b/src/llm/services/fireworks.ts @@ -1,4 +1,5 @@ import { OpenAIProvider, createOpenAI } from '@ai-sdk/openai'; +import { InputCostFunction, OutputCostFunction, perMilTokens } from '#llm/base-llm'; import { AiLLM } from '#llm/services/ai-llm'; import { currentUser } from '#user/userService/userContext'; import { LLM } from '../llm'; @@ -6,13 +7,7 @@ import { LLM } from '../llm'; export const FIREWORKS_SERVICE = 'fireworks'; export class Fireworks extends AiLLM { - constructor( - displayName: string, - model: string, - maxTokens: number, - calculateInputCost: (input: string) => number, - calculateOutputCost: (output: string) => number, - ) { + constructor(displayName: string, model: string, maxTokens: number, calculateInputCost: InputCostFunction, calculateOutputCost: OutputCostFunction) { super(displayName, FIREWORKS_SERVICE, model, maxTokens, calculateInputCost, calculateOutputCost); } @@ -42,41 +37,17 @@ export function fireworksLLMRegistry(): Record LLM> { } export function fireworksLlama3_70B(): LLM { - return new Fireworks( - 'LLama3 70b-i (Fireworks)', - 'accounts/fireworks/models/llama-v3p1-70b-instruct', - 131_072, - (input: string) => (input.length * 0.9) / 1_000_000 / 4, - (output: string) => (output.length * 0.9) / 1_000_000 / 4, - ); + return new Fireworks('LLama3 70b-i (Fireworks)', 'accounts/fireworks/models/llama-v3p1-70b-instruct', 131_072, perMilTokens(0.9), perMilTokens(0.9)); } export function fireworksLlama3_405B(): LLM { - return new Fireworks( - 'LLama3 405b-i (Fireworks)', - 'accounts/fireworks/models/llama-v3p1-405b-instruct', - 131_072, - (input: string) => (input.length * 3) / 1_000_000 / 4, - (output: string) => (output.length * 3) / 1_000_000 / 4, - ); + return new Fireworks('LLama3 405b-i (Fireworks)', 'accounts/fireworks/models/llama-v3p1-405b-instruct', 131_072, perMilTokens(3), perMilTokens(3)); } export function fireworksDeepSeek(): LLM { - return new Fireworks( - 'DeepSeek 3 (Fireworks)', - 'accounts/fireworks/models/deepseek-v3', - 131_072, - (input: string) => (input.length * 0.9) / 1_000_000 / 4, - (output: string) => (output.length * 0.9) / 1_000_000 / 4, - ); + return new Fireworks('DeepSeek 3 (Fireworks)', 'accounts/fireworks/models/deepseek-v3', 131_072, perMilTokens(0.9), perMilTokens(0.9)); } export function fireworksDeepSeekR1(): LLM { - return new Fireworks( - 'DeepSeek R1 (Fireworks)', - 'accounts/fireworks/models/deepseek-r1', - 131_072, - (input: string) => (input.length * 8) / 1_000_000 / 4, - (output: string) => (output.length * 8) / 1_000_000 / 4, - ); + return new Fireworks('DeepSeek R1 (Fireworks)', 'accounts/fireworks/models/deepseek-r1', 131_072, perMilTokens(8), perMilTokens(8)); } diff --git a/src/llm/services/groq.ts b/src/llm/services/groq.ts index b810ea85..d1882026 100644 --- a/src/llm/services/groq.ts +++ b/src/llm/services/groq.ts @@ -1,4 +1,5 @@ import { GroqProvider, createGroq } from '@ai-sdk/groq'; +import { InputCostFunction, OutputCostFunction, perMilTokens } from '#llm/base-llm'; import { AiLLM } from '#llm/services/ai-llm'; import { currentUser } from '#user/userService/userContext'; import { LLM } from '../llm'; @@ -16,26 +17,14 @@ export function groqLLMRegistry(): Record LLM> { // https://console.groq.com/docs/models export function groqLlama3_3_70B(): LLM { - return new GroqLLM( - 'Llama3.3 70b (Groq)', - 'llama-3.3-70b-versatile', - 131_072, - (input: string) => (input.length * 0.59) / (1_000_000 * 4), - (output: string) => (output.length * 0.79) / (1_000_000 * 4), - ); + return new GroqLLM('Llama3.3 70b (Groq)', 'llama-3.3-70b-versatile', 131_072, perMilTokens(0.59), perMilTokens(0.79)); } /** * https://wow.groq.com/ */ export class GroqLLM extends AiLLM { - constructor( - displayName: string, - model: string, - maxTokens: number, - calculateInputCost: (input: string) => number, - calculateOutputCost: (output: string) => number, - ) { + constructor(displayName: string, model: string, maxTokens: number, calculateInputCost: InputCostFunction, calculateOutputCost: OutputCostFunction) { super(displayName, GROQ_SERVICE, model, maxTokens, calculateInputCost, calculateOutputCost); } diff --git a/src/llm/services/llm.int.ts b/src/llm/services/llm.int.ts index 80344bf5..1f2651ca 100644 --- a/src/llm/services/llm.int.ts +++ b/src/llm/services/llm.int.ts @@ -8,6 +8,7 @@ import { deepinfraQwQ_32B, deepinfraQwen2_5_Coder32B } from '#llm/services/deepi import { deepSeekV3 } from '#llm/services/deepseek'; import { fireworksLlama3_70B } from '#llm/services/fireworks'; import { groqLlama3_3_70B } from '#llm/services/groq'; +import { nebiusDeepSeekR1 } from '#llm/services/nebius'; import { Ollama_Phi3 } from '#llm/services/ollama'; import { GPT4oMini } from '#llm/services/openai'; import { togetherLlama3_70B } from '#llm/services/together'; @@ -154,6 +155,15 @@ describe('LLMs', () => { }); }); + describe('Nebius', () => { + const llm = nebiusDeepSeekR1(); + + it('should generateText', async () => { + const response = await llm.generateText(SKY_PROMPT, { temperature: 0 }); + expect(response.toLowerCase()).to.include('blue'); + }); + }); + describe('Ollama', () => { const llm = Ollama_Phi3(); diff --git a/src/llm/services/nebius.ts b/src/llm/services/nebius.ts new file mode 100644 index 00000000..997d3611 --- /dev/null +++ b/src/llm/services/nebius.ts @@ -0,0 +1,35 @@ +import { OpenAIProvider, createOpenAI } from '@ai-sdk/openai'; +import { InputCostFunction, OutputCostFunction, perMilTokens } from '#llm/base-llm'; +import { AiLLM } from '#llm/services/ai-llm'; +import { currentUser } from '#user/userService/userContext'; +import { LLM } from '../llm'; + +export const NEBIUS_SERVICE = 'nebius'; + +export function nebiusLLMRegistry(): Record LLM> { + return { + 'nebius:deepseek-ai/DeepSeek-R1': nebiusDeepSeekR1, + }; +} + +export function nebiusDeepSeekR1() { + return new NebiusLLM('DeepSeek R1 (Nebius)', 'deepseek-ai/DeepSeek-R1', perMilTokens(0.8), perMilTokens(2.4)); +} + +export class NebiusLLM extends AiLLM { + constructor(displayName: string, model: string, calculateInputCost: InputCostFunction, calculateOutputCost: OutputCostFunction) { + super(displayName, NEBIUS_SERVICE, model, 128_000, calculateInputCost, calculateOutputCost); + } + + protected apiKey(): string { + return currentUser().llmConfig.nebiusKey || process.env.NEBIUS_API_KEY; + } + + provider(): OpenAIProvider { + this.aiProvider ??= createOpenAI({ + baseURL: 'https://api.studio.nebius.ai/v1/', + apiKey: this.apiKey(), + }); + return this.aiProvider; + } +} diff --git a/src/llm/services/openai.ts b/src/llm/services/openai.ts index 49dcb60c..91495b1a 100644 --- a/src/llm/services/openai.ts +++ b/src/llm/services/openai.ts @@ -1,6 +1,6 @@ import { OpenAIProvider, createOpenAI } from '@ai-sdk/openai'; +import { InputCostFunction, OutputCostFunction, perMilTokens } from '#llm/base-llm'; import { AiLLM } from '#llm/services/ai-llm'; -import { logger } from '#o11y/logger'; import { currentUser } from '#user/userService/userContext'; import { GenerateTextOptions, LLM, LlmMessage } from '../llm'; @@ -8,59 +8,52 @@ export const OPENAI_SERVICE = 'openai'; export function openAiLLMRegistry(): Record LLM> { return { - 'openai:gpt-4o': () => openaiLLmFromModel('gpt-4o'), - 'openai:gpt-4o-mini': () => openaiLLmFromModel('gpt-4o-mini'), - 'openai:o1-preview': () => openaiLLmFromModel('o1-preview'), - 'openai:o1-mini': () => openaiLLmFromModel('o1-mini'), + 'openai:gpt-4o': () => GPT4o(), + 'openai:gpt-4o-mini': () => GPT4oMini(), + 'openai:o1-preview': () => openAIo1Preview(), + 'openai:o1': () => openAIo1(), + 'openai:o1-mini': () => openAIo1mini(), + 'openai:o3-mini': () => openAIo3mini(), }; } -export function openaiLLmFromModel(model: string): LLM { - if (model.startsWith('gpt-4o-mini')) return GPT4oMini(); - if (model.startsWith('gpt-4o')) return GPT4o(); - if (model.startsWith('o1-preview')) return openAIo1(); - if (model.startsWith('o1-mini')) return openAIo1mini(); - throw new Error(`Unsupported ${OPENAI_SERVICE} model: ${model}`); +export function openAIo1() { + return new OpenAI('OpenAI o1', 'o1', inputCost(15), perMilTokens(60)); } -export function openAIo1() { - return new OpenAI( - 'OpenAI o1 preview', - 'o1-preview', - (input: string) => (input.length * 15) / 1_000_000, - (output: string) => (output.length * 60) / (1_000_000 * 4), - ); +export function openAIo1Preview() { + return new OpenAI('OpenAI o1 preview', 'o1-preview', inputCost(15), perMilTokens(60)); } export function openAIo1mini() { - return new OpenAI( - 'OpenAI o1-mini', - 'o1-mini', - (input: string) => (input.length * 3) / 1_000_000, - (output: string) => (output.length * 12) / (1_000_000 * 4), - ); + return new OpenAI('OpenAI o1-mini', 'o1-mini', inputCost(3), perMilTokens(12)); +} + +export function openAIo3mini() { + return new OpenAI('OpenAI o3-mini', 'o3-mini', inputCost(1.1), perMilTokens(4.4)); } export function GPT4o() { - return new OpenAI( - 'GPT4o', - 'gpt-4o', - (input: string) => (input.length * 2.5) / 1_000_000, - (output: string) => (output.length * 10) / (1_000_000 * 4), - ); + return new OpenAI('GPT4o', 'gpt-4o', inputCost(2.5), perMilTokens(10)); } export function GPT4oMini() { - return new OpenAI( - 'GPT4o mini', - 'gpt-4o-mini', - (input: string) => (input.length * 0.15) / (1_000_000 * 4), - (output: string) => (output.length * 0.6) / (1_000_000 * 4), - ); + return new OpenAI('GPT4o mini', 'gpt-4o-mini', inputCost(0.15), perMilTokens(0.6)); +} + +// https://sdk.vercel.ai/providers/ai-sdk-providers/openai#prompt-caching +function inputCost(dollarsPerMillionTokens: number): InputCostFunction { + return (input: string, tokens: number, experimental_providerMetadata: any) => { + const cachedPromptTokens = experimental_providerMetadata?.openai?.cachedPromptTokens; + if (cachedPromptTokens) { + return ((tokens - cachedPromptTokens) * dollarsPerMillionTokens) / 1_000_000 + (cachedPromptTokens * dollarsPerMillionTokens) / 2 / 1_000_000; + } + return (tokens * dollarsPerMillionTokens) / 1_000_000; + }; } export class OpenAI extends AiLLM { - constructor(displayName: string, model: string, calculateInputCost: (input: string) => number, calculateOutputCost: (output: string) => number) { + constructor(displayName: string, model: string, calculateInputCost: InputCostFunction, calculateOutputCost: OutputCostFunction) { super(displayName, OPENAI_SERVICE, model, 128_000, calculateInputCost, calculateOutputCost); } @@ -69,11 +62,9 @@ export class OpenAI extends AiLLM { } provider(): OpenAIProvider { - if (!this.aiProvider) { - this.aiProvider = createOpenAI({ - apiKey: this.apiKey(), - }); - } + this.aiProvider ??= createOpenAI({ + apiKey: this.apiKey(), + }); return this.aiProvider; } diff --git a/src/llm/services/perplexity-llm.ts b/src/llm/services/perplexity-llm.ts index 587e7512..23a76e65 100644 --- a/src/llm/services/perplexity-llm.ts +++ b/src/llm/services/perplexity-llm.ts @@ -22,30 +22,41 @@ llama-3.1-sonar-huge-128k-online $5 $5 export function perplexityLLMRegistry(): Record LLM> { return { - [`${PERPLEXITY_SERVICE}:llama-3.1-sonar-large-128k-online`]: perplexityLargeLLM, - [`${PERPLEXITY_SERVICE}:llama-3.1-sonar-huge-128k-online`]: perplexityHugeLLM, + [`${PERPLEXITY_SERVICE}:sonar`]: perplexityLLM, + [`${PERPLEXITY_SERVICE}:sonar-pro`]: perplexityProLLM, }; } -export function perplexityLargeLLM(): LLM { +export function perplexityLLM(): LLM { return new PerplexityLLM( - 'Perplexity Large', - 'llama-3.1-sonar-large-128k-online', - 128000, // maxTokens + 'Perplexity', + 'sonar', + 127_000, // maxTokens 0.000001, // costPerPromptToken ($1 per million tokens) 0.000001, // costPerCompletionToken - 0.005, // onlineCost ($5 per 1000 requests) + 0.005, // 1 search ($5 per 1000 requests) ); } -export function perplexityHugeLLM(): LLM { +export function perplexityProLLM(): LLM { return new PerplexityLLM( - 'Perplexity Huge', - 'llama-3.1-sonar-huge-128k-online', - 128000, // maxTokens + 'Perplexity Pro', + 'sonar-pro', + 200_000, // maxTokens 0.000005, // costPerPromptToken ($5 per million tokens) 0.000005, // costPerCompletionToken - 0.005, // onlineCost ($5 per 1000 requests) + 0.015, // 3 searches ($5 per 1000 requests) + ); +} + +export function perplexityReasoningLLM(): LLM { + return new PerplexityLLM( + 'Perplexity Reasoning', + 'sonar-reasoning', + 127_000, // maxTokens + 0.000001, // costPerPromptToken ($1 per million tokens) + 0.000005, // costPerCompletionToken + 0.005, // 1 search ($5 per 1000 requests) ); } diff --git a/src/llm/services/together.ts b/src/llm/services/together.ts index 50864930..335d1dcd 100644 --- a/src/llm/services/together.ts +++ b/src/llm/services/together.ts @@ -1,5 +1,6 @@ import { TogetherAIProvider, createTogetherAI } from '@ai-sdk/togetherai'; import { LanguageModelV1 } from 'ai'; +import { InputCostFunction, OutputCostFunction, perMilTokens } from '#llm/base-llm'; import { AiLLM } from '#llm/services/ai-llm'; import { currentUser } from '#user/userService/userContext'; import { LLM } from '../llm'; @@ -9,18 +10,18 @@ export const TOGETHER_SERVICE = 'together'; export function togetherLLMRegistry(): Record LLM> { return { [`${TOGETHER_SERVICE}:meta-llama/Llama-3-70b-chat-hf`]: () => togetherLlama3_70B(), + [`${TOGETHER_SERVICE}:deepseek-ai/DeepSeek-R1`]: () => togetherDeepSeekR1(), }; } export function togetherLlama3_70B(): LLM { - return new TogetherLLM( - 'Llama3 70b (Together)', - 'meta-llama/Llama-3-70b-chat-hf', - 8000, - (input: string) => (input.length * 0.9) / 1_000_000, - (output: string) => (output.length * 0.9) / 1_000_000, - ); + return new TogetherLLM('Llama3 70b (Together)', 'meta-llama/Llama-3-70b-chat-hf', 8000, perMilTokens(0.9), perMilTokens(0.9)); } + +export function togetherDeepSeekR1(): LLM { + return new TogetherLLM('DeepSeek R1 (Together)', 'deepseek-ai/DeepSeek-R1', 64000, perMilTokens(8), perMilTokens(8)); +} + type TogetherAIProviderV1 = TogetherAIProvider & { languageModel: (modelId: string) => LanguageModelV1; }; @@ -28,13 +29,7 @@ type TogetherAIProviderV1 = TogetherAIProvider & { * Together AI models */ export class TogetherLLM extends AiLLM { - constructor( - displayName: string, - model: string, - maxTokens: number, - inputCostPerToken: (input: string) => number, - outputCostPerToken: (output: string) => number, - ) { + constructor(displayName: string, model: string, maxTokens: number, inputCostPerToken: InputCostFunction, outputCostPerToken: OutputCostFunction) { super(displayName, TOGETHER_SERVICE, model, maxTokens, inputCostPerToken, outputCostPerToken); } diff --git a/src/llm/services/vertexai.ts b/src/llm/services/vertexai.ts index ecf481a1..d19c7bc5 100644 --- a/src/llm/services/vertexai.ts +++ b/src/llm/services/vertexai.ts @@ -2,6 +2,7 @@ import { GoogleVertexProvider, createVertex } from '@ai-sdk/google-vertex'; import { HarmBlockThreshold, HarmCategory, SafetySetting } from '@google-cloud/vertexai'; import axios from 'axios'; import { AgentLLMs } from '#agent/agentContextTypes'; +import { InputCostFunction, OutputCostFunction } from '#llm/base-llm'; import { AiLLM } from '#llm/services/ai-llm'; import { currentUser } from '#user/userService/userContext'; import { envVar } from '#utils/env-var'; @@ -71,7 +72,7 @@ export function Gemini_1_5_Flash() { // export function Gemini_1_5_Flash_8B() { // return new VertexLLM( // 'Gemini 1.5 Flash 8B', -// 'gemini-1.5-flash-8b', +// 'gemini-1.5-flash-8b', // gemini-1.5-flash-8b, alias that points to gemini-1.5-flash-8b-001 // 1_000_000, // (input: string) => (input.length * 0.000125) / 1000, // (output: string) => (output.length * 0.000375) / 1000, @@ -91,7 +92,7 @@ export function Gemini_2_0_Flash() { export function Gemini_2_0_Flash_Thinking() { return new VertexLLM( 'Gemini 2.0 Flash Thinking Experimental', - 'gemini-2.0-flash-thinking-exp-1219', + 'gemini-2.0-flash-thinking-exp-1219', // gemini-2.0-flash-thinking-exp-01-21 1_000_000, (input: string) => (input.length * 0.000125) / 1000, (output: string) => (output.length * 0.000375) / 1000, @@ -112,13 +113,7 @@ export function Vertex_Llama3_405b() { * Vertex AI models - Gemini */ class VertexLLM extends AiLLM { - constructor( - displayName: string, - model: string, - maxInputToken: number, - calculateInputCost: (input: string) => number, - calculateOutputCost: (output: string) => number, - ) { + constructor(displayName: string, model: string, maxInputToken: number, calculateInputCost: InputCostFunction, calculateOutputCost: OutputCostFunction) { super(displayName, VERTEX_SERVICE, model, maxInputToken, calculateInputCost, calculateOutputCost); } diff --git a/src/llm/services/xai.ts b/src/llm/services/xai.ts index 7d2fda6b..4b3db94a 100644 --- a/src/llm/services/xai.ts +++ b/src/llm/services/xai.ts @@ -1,4 +1,5 @@ import { OpenAIProvider, createOpenAI } from '@ai-sdk/openai'; +import { InputCostFunction, OutputCostFunction, perMilTokens } from '#llm/base-llm'; import { AiLLM } from '#llm/services/ai-llm'; import { currentUser } from '#user/userService/userContext'; import { LLM } from '../llm'; @@ -6,13 +7,7 @@ import { LLM } from '../llm'; export const XAI_SERVICE = 'xai'; export class XAI extends AiLLM { - constructor( - displayName: string, - model: string, - maxTokens: number, - calculateInputCost: (input: string) => number, - calculateOutputCost: (output: string) => number, - ) { + constructor(displayName: string, model: string, maxTokens: number, calculateInputCost: InputCostFunction, calculateOutputCost: OutputCostFunction) { super(displayName, XAI_SERVICE, model, maxTokens, calculateInputCost, calculateOutputCost); } @@ -38,11 +33,5 @@ export function xaiLLMRegistry(): Record LLM> { } export function xai_GrokBeta(): LLM { - return new XAI( - 'Grok beta', - 'grok-beta', - 131_072, - (input: string) => (input.length * 0.9) / 1_000_000 / 4, - (output: string) => (output.length * 0.9) / 1_000_000 / 4, - ); + return new XAI('Grok beta', 'grok-beta', 131_072, perMilTokens(0.9), perMilTokens(0.9)); } diff --git a/src/modules/firestore/firestoreUserService.ts b/src/modules/firestore/firestoreUserService.ts index 6c7ee0a5..21dd4c4e 100644 --- a/src/modules/firestore/firestoreUserService.ts +++ b/src/modules/firestore/firestoreUserService.ts @@ -78,7 +78,7 @@ export class FirestoreUserService implements UserService { }; } if (!user.chat.enabledLLMs) user.chat.enabledLLMs = {}; - if (!user.chat.temperature) user.chat.temperature = 1; + if (!user.chat.temperature) user.chat.temperature = 0.7; if (!user.chat.topP) user.chat.topP = 1; if (!user.chat.topK) user.chat.topK = 50; if (!user.chat.frequencyPenalty) user.chat.frequencyPenalty = 0; diff --git a/src/modules/slack/slackChatBotService.ts b/src/modules/slack/slackChatBotService.ts index 5681d96a..1e0ef280 100644 --- a/src/modules/slack/slackChatBotService.ts +++ b/src/modules/slack/slackChatBotService.ts @@ -16,6 +16,8 @@ import { ChatBotService } from '../../chatBot/chatBotService'; let slackApp: App | undefined; +const CHATBOT_FUNCTIONS: Array any> = [GitLab, GoogleCloud, Perplexity, LlmTools]; + /** * Slack implementation of ChatBotService * Only one Slack workspace can be configured in the application as the Slack App is shared between all instances of this class. @@ -142,7 +144,7 @@ export class SlackChatBotService implements ChatBotService, AgentCompleted { resumeAgentId: `Slack-${threadId}`, initialPrompt: text, llms: defaultLLMs(), - functions: [GitLab, GoogleCloud, Perplexity, LlmTools], + functions: CHATBOT_FUNCTIONS, agentName: `Slack-${threadId}`, systemPrompt: 'You are an AI support agent called Sophia. You are responding to support requests on the company Slack account. Respond in a helpful, concise manner. If you encounter an error responding to the request do not provide details of the error to the user, only respond with "Sorry, I\'m having difficulties providing a response to your request"', diff --git a/src/routes/agent/agent-execution-routes.ts b/src/routes/agent/agent-execution-routes.ts index 7448c3fa..199d6ca2 100644 --- a/src/routes/agent/agent-execution-routes.ts +++ b/src/routes/agent/agent-execution-routes.ts @@ -2,6 +2,7 @@ import { Type } from '@sinclair/typebox'; import { LlmFunctions } from '#agent/LlmFunctions'; import { AgentContext } from '#agent/agentContextTypes'; import { cancelAgent, provideFeedback, resumeCompleted, resumeError, resumeHil } from '#agent/agentRunner'; +import { serializeContext } from '#agent/agentSerialization'; import { forceStopAgent } from '#agent/forceStopAgent'; import { runXmlAgent } from '#agent/xmlAgentRunner'; import { send, sendBadRequest } from '#fastify/index'; @@ -46,9 +47,15 @@ export async function agentExecutionRoutes(fastify: AppFastifyInstance) { async (req, reply) => { const { agentId, feedback, executionId } = req.body; - await provideFeedback(agentId, executionId, feedback); - - send(reply, 200); + try { + await provideFeedback(agentId, executionId, feedback); + const updatedAgent = await fastify.agentStateService.load(agentId); + if (!updatedAgent) return sendBadRequest(reply, 'Agent not found'); + send(reply, 200, serializeContext(updatedAgent)); + } catch (error) { + logger.error('Error providing feedback:', error); + sendBadRequest(reply, 'Error providing feedback'); + } }, ); @@ -68,8 +75,9 @@ export async function agentExecutionRoutes(fastify: AppFastifyInstance) { const { agentId, executionId, feedback } = req.body; await resumeError(agentId, executionId, feedback); - - send(reply, 200); + const updatedAgent = await fastify.agentStateService.load(agentId); + if (!updatedAgent) return sendBadRequest(reply, 'Agent not found'); + send(reply, 200, serializeContext(updatedAgent)); }, ); @@ -89,8 +97,9 @@ export async function agentExecutionRoutes(fastify: AppFastifyInstance) { const { agentId, executionId, feedback } = req.body; await resumeHil(agentId, executionId, feedback); - - send(reply, 200); + const updatedAgent = await fastify.agentStateService.load(agentId); + if (!updatedAgent) return sendBadRequest(reply, 'Agent not found'); + send(reply, 200, serializeContext(updatedAgent)); }, ); @@ -110,7 +119,9 @@ export async function agentExecutionRoutes(fastify: AppFastifyInstance) { const { agentId, executionId, reason } = req.body; await cancelAgent(agentId, executionId, reason); - send(reply, 200); + const updatedAgent = await fastify.agentStateService.load(agentId); + if (!updatedAgent) return sendBadRequest(reply, 'Agent not found'); + send(reply, 200, serializeContext(updatedAgent)); }, ); @@ -131,7 +142,9 @@ export async function agentExecutionRoutes(fastify: AppFastifyInstance) { try { await resumeCompleted(agentId, executionId, instructions); - send(reply, 200); + const updatedAgent = await fastify.agentStateService.load(agentId); + if (!updatedAgent) return sendBadRequest(reply, 'Agent not found'); + send(reply, 200, serializeContext(updatedAgent)); } catch (error) { logger.error(error, 'Error resuming completed agent'); sendBadRequest(reply, 'Error resuming completed agent'); @@ -155,9 +168,7 @@ export async function agentExecutionRoutes(fastify: AppFastifyInstance) { try { const agent = await fastify.agentStateService.load(agentId); - if (!agent) { - throw new Error('Agent not found'); - } + if (!agent) throw new Error('Agent not found'); agent.functions = new LlmFunctions(); for (const functionName of functions) { @@ -170,7 +181,8 @@ export async function agentExecutionRoutes(fastify: AppFastifyInstance) { } await fastify.agentStateService.save(agent); - send(reply, 200, { message: 'Agent functions updated successfully' }); + const updatedAgent = await fastify.agentStateService.load(agentId); + send(reply, 200, serializeContext(updatedAgent)); } catch (error) { logger.error('Error updating agent functions:', error); sendBadRequest(reply, 'Error updating agent functions'); diff --git a/src/routes/chat/chat-routes.ts b/src/routes/chat/chat-routes.ts index 1d8d7571..11355292 100644 --- a/src/routes/chat/chat-routes.ts +++ b/src/routes/chat/chat-routes.ts @@ -58,8 +58,7 @@ export async function chatRoutes(fastify: AppFastifyInstance) { const text = typeof userContent === 'string' ? userContent : userContent.find((content) => content.type === 'text')?.text; const titlePromise: Promise | undefined = llm.generateText( - 'The following message is the first message in a new chat conversation. Your task is to create a short title for the conversation. Respond only with the title, nothing else', - text, + `\n${text}\n\n\n\nThe above message is the first message in a new chat conversation. Your task is to create a short title in a few words for the conversation. Respond only with the title, nothing else.`, ); chat.messages.push({ role: 'user', content: userContent, time: Date.now() }); //, cache: cache ? 'ephemeral' : undefined // remove any previous cache marker diff --git a/src/routes/webhooks/gitlab/gitlabRoutes-v1.ts b/src/routes/webhooks/gitlab/gitlabRoutes-v1.ts index 7b8abeef..7d13c864 100644 --- a/src/routes/webhooks/gitlab/gitlabRoutes-v1.ts +++ b/src/routes/webhooks/gitlab/gitlabRoutes-v1.ts @@ -36,25 +36,28 @@ export async function gitlabRoutesV1(fastify: AppFastifyInstance) { if (event.object_attributes.draft) sendSuccess(reply); + const runAsUser = await appContext().userService.getUserByEmail(envVar('GITLAB_REVIEW_USER_EMAIL')); + if (!runAsUser) throw new Error(`Could not find user from env var GITLAB_REVIEW_USER_EMAIL with value ${envVar('GITLAB_REVIEW_USER_EMAIL')}`); + const config: RunAgentConfig = { agentName: `MR review - ${event.object_attributes.title}`, llms: defaultLLMs(), functions: [], - user: await appContext().userService.getUserByEmail(envVar('GITLAB_REVIEW_USER_EMAIL')), + user: runAsUser, initialPrompt: '', humanInLoop: envVarHumanInLoopSettings(), }; const context: AgentContext = createContext(config); - const mergeRequestId = `${event.project.id}, ${event.object_attributes.id}, ${event.object_attributes.title}`; + const mergeRequestId = `project:${event.project.name}, miid:${event.object_attributes.iid}, MR:"${event.object_attributes.title}"`; logger.info(`Agent ${context.agentId} reviewing merge request ${mergeRequestId}`); agentContextStorage.run(context, () => { new GitLab() - .reviewMergeRequest(event.project.id, event.object_attributes.id) + .reviewMergeRequest(event.project.id, event.object_attributes.iid) .then(() => { logger.debug(`Competed review of merge request ${mergeRequestId}`); }) - .catch((error) => logger.error(error, `Error reviewing merge request ${mergeRequestId}`)); + .catch((error) => logger.error(error, `Error reviewing merge request ${mergeRequestId}. Message: ${error.message} [error]`)); }); send(reply, 200); diff --git a/src/routes/code/code-routes.ts b/src/routes/workflows/workflow-routes.ts similarity index 93% rename from src/routes/code/code-routes.ts rename to src/routes/workflows/workflow-routes.ts index 303694e7..f2c2b926 100644 --- a/src/routes/code/code-routes.ts +++ b/src/routes/workflows/workflow-routes.ts @@ -5,12 +5,11 @@ import { Type } from '@sinclair/typebox'; import { getFileSystem } from '#agent/agentContextLocalStorage'; import { RunAgentConfig } from '#agent/agentRunner'; import { runAgentWorkflow } from '#agent/agentWorkflowRunner'; -import { ClaudeVertexLLMs } from '#llm/services/anthropic-vertex'; import { defaultLLMs } from '#llm/services/defaultLlms'; import { Gemini_1_5_Flash } from '#llm/services/vertexai'; import { logger } from '#o11y/logger'; import { CodeEditingAgent } from '#swe/codeEditingAgent'; -import { codebaseQuery } from '#swe/discovery/codebaseQuery'; +import { queryWorkflow } from '#swe/discovery/selectFilesAgent'; import { SelectFilesResponse, selectFilesToEdit } from '#swe/discovery/selectFilesToEdit'; import { sophiaDirName, systemDir } from '../../appVars'; import { AppFastifyInstance } from '../../server'; @@ -34,11 +33,11 @@ function findRepositories(dir: string): string[] { return repos; } -export async function codeRoutes(fastify: AppFastifyInstance) { +export async function workflowRoutes(fastify: AppFastifyInstance) { // /get // See https://docs.gitlab.com/ee/user/project/integrations/webhook_events.html#merge-request-events fastify.post( - '/api/code/edit', + '/api/workflows/edit', { schema: { body: Type.Object({ @@ -84,7 +83,7 @@ export async function codeRoutes(fastify: AppFastifyInstance) { ); fastify.post( - '/api/code/query', + '/api/workflows/query', { schema: { body: Type.Object({ @@ -118,7 +117,7 @@ export async function codeRoutes(fastify: AppFastifyInstance) { getFileSystem().setWorkingDirectory(workingDirectory); logger.info(`Working directory is ${getFileSystem().getWorkingDirectory()}`); - response = await codebaseQuery(query); + response = await queryWorkflow(query); }); reply.send({ response }); @@ -130,7 +129,7 @@ export async function codeRoutes(fastify: AppFastifyInstance) { ); fastify.post( - '/api/code/select-files', + '/api/workflows/select-files', { schema: { body: Type.Object({ @@ -171,7 +170,7 @@ export async function codeRoutes(fastify: AppFastifyInstance) { }, ); - fastify.get('/api/code/repositories', async (request, reply) => { + fastify.get('/api/workflows/repositories', async (request, reply) => { try { const workingDirectory = process.cwd(); const gitlabRepos = findRepositories(path.join(systemDir(), 'gitlab')); diff --git a/src/server.ts b/src/server.ts index 4f66fe2a..e8a6946e 100644 --- a/src/server.ts +++ b/src/server.ts @@ -7,13 +7,13 @@ import { agentExecutionRoutes } from './routes/agent/agent-execution-routes'; import { agentStartRoute } from './routes/agent/agent-start-route'; import { authRoutes } from './routes/auth/auth-routes'; import { chatRoutes } from './routes/chat/chat-routes'; -import { codeRoutes } from './routes/code/code-routes'; import { llmCallRoutes } from './routes/llms/llm-call-routes'; import { llmRoutes } from './routes/llms/llm-routes'; import { profileRoute } from './routes/profile/profile-route'; import { codeReviewRoutes } from './routes/scm/codeReviewRoutes'; import { gitlabRoutesV1 } from './routes/webhooks/gitlab/gitlabRoutes-v1'; import { jiraRoutes } from './routes/webhooks/jira/jira-routes'; +import { workflowRoutes } from './routes/workflows/workflow-routes'; export interface AppFastifyInstance extends TypeBoxFastifyInstance, ApplicationContext {} @@ -39,7 +39,7 @@ export async function initServer(): Promise { llmCallRoutes, codeReviewRoutes, chatRoutes, - codeRoutes, + workflowRoutes, jiraRoutes, // Add your routes below this line ], diff --git a/src/swe/codeEditingAgent.ts b/src/swe/codeEditingAgent.ts index e991bbb5..7b79591b 100644 --- a/src/swe/codeEditingAgent.ts +++ b/src/swe/codeEditingAgent.ts @@ -7,7 +7,7 @@ import { span } from '#o11y/trace'; import { CompileErrorAnalysis, CompileErrorAnalysisDetails, analyzeCompileErrors } from '#swe/analyzeCompileErrors'; import { SelectedFile, selectFilesAgent } from '#swe/discovery/selectFilesAgent'; import { includeAlternativeAiToolFiles } from '#swe/includeAlternativeAiToolFiles'; -import { getRepositoryOverview, getTopLevelSummary } from '#swe/repoIndexDocBuilder'; +import { getRepositoryOverview, getTopLevelSummary } from '#swe/index/repoIndexDocBuilder'; import { reviewChanges } from '#swe/reviewChanges'; import { supportingInformation } from '#swe/supportingInformation'; import { execCommand, runShellCommand } from '#utils/exec'; @@ -411,9 +411,9 @@ Then respond in following format: @cacheRetry() async extractFilenames(summary: string): Promise { - const filenames = await getFileSystem().listFilesRecursively(); + const filenames = await getFileSystem().getFileSystemTree(); const prompt = buildPrompt({ - information: `\n${filenames.join('\n')}\n`, + information: `\n${filenames}\n`, requirements: summary, action: 'You will respond ONLY in JSON. From the requirements quietly consider which the files may be required to complete the task. You MUST output your answer ONLY as JSON in the format of this example:\n\n{\n files: ["file1", "file2", "file3"]\n}\n', diff --git a/src/swe/codeFunctions.ts b/src/swe/codeFunctions.ts index c5c61781..534d5231 100644 --- a/src/swe/codeFunctions.ts +++ b/src/swe/codeFunctions.ts @@ -1,5 +1,5 @@ import { func, funcClass } from '#functionSchema/functionDecorators'; -import { codebaseQuery } from '#swe/discovery/codebaseQuery'; +import { queryWorkflow } from '#swe/discovery/selectFilesAgent'; import { SelectFilesResponse, selectFilesToEdit } from '#swe/discovery/selectFilesToEdit'; import { getProjectInfo } from '#swe/projectDetection'; import { reviewChanges } from '#swe/reviewChanges'; @@ -12,7 +12,7 @@ export class CodeFunctions { */ @func() async queryRepository(query: string): Promise { - return await codebaseQuery(query); + return await queryWorkflow(query); } /** diff --git a/src/swe/discovery/codebaseQuery.ts b/src/swe/discovery/codebaseQuery.ts index 9a5918a5..56419e06 100644 --- a/src/swe/discovery/codebaseQuery.ts +++ b/src/swe/discovery/codebaseQuery.ts @@ -1,13 +1,18 @@ import { getFileSystem, llms } from '#agent/agentContextLocalStorage'; import { logger } from '#o11y/logger'; +import { getTopLevelSummary } from '#swe/index/repoIndexDocBuilder'; +import { RepositoryMaps, generateRepositoryMaps } from '#swe/index/repositoryMap'; import { ProjectInfo, getProjectInfo } from '#swe/projectDetection'; -import { getTopLevelSummary } from '#swe/repoIndexDocBuilder'; -import { RepositoryMaps, generateRepositoryMaps } from '#swe/repositoryMap'; interface FileSelection { files: string[]; } +/** + * Old ad-hoc query with fixed two passes over the filesystem tree/files + * @deprecated use queryWorkflow instead + * @param query + */ export async function codebaseQuery(query: string): Promise { const initialFileSelection = await firstPass(query); const refinedFileSelection = await secondPass(query, initialFileSelection); diff --git a/src/swe/discovery/selectFilesAgent.ts b/src/swe/discovery/selectFilesAgent.ts index 730fb376..da983e7c 100644 --- a/src/swe/discovery/selectFilesAgent.ts +++ b/src/swe/discovery/selectFilesAgent.ts @@ -1,10 +1,10 @@ import path from 'path'; import { getFileSystem, llms } from '#agent/agentContextLocalStorage'; -import { LlmMessage } from '#llm/llm'; +import { LLM, LlmMessage } from '#llm/llm'; import { logger } from '#o11y/logger'; -import { ProjectInfo, detectProjectInfo, getProjectInfo } from '#swe/projectDetection'; -import { getRepositoryOverview } from '#swe/repoIndexDocBuilder'; -import { RepositoryMaps, generateRepositoryMaps } from '#swe/repositoryMap'; +import { getRepositoryOverview } from '#swe/index/repoIndexDocBuilder'; +import { RepositoryMaps, generateRepositoryMaps } from '#swe/index/repositoryMap'; +import { ProjectInfo, detectProjectInfo } from '#swe/projectDetection'; /* Agent which iteratively loads files to find the file set required for a task/query. @@ -68,6 +68,7 @@ Your response must finish in the following format: + @@ -119,7 +120,12 @@ Your response must end with a JSON object wrapped in tags in the followin ]; } -async function generateFileSelectionProcessingResponse(messages: LlmMessage[], pendingFiles: string[]): Promise { +async function generateFileSelectionProcessingResponse( + messages: LlmMessage[], + pendingFiles: string[], + iteration: number, + llm: LLM, +): Promise { const prompt = ` ${(await readFileContents(pendingFiles)).contents} @@ -130,9 +136,13 @@ Respond only as per the Process Files Response Instructions. `; const iterationMessages: LlmMessage[] = [...messages, { role: 'user', content: prompt }]; - return await llms().medium.generateTextWithJson(iterationMessages, { id: 'Select Files iteration' }); + return await llm.generateTextWithJson(iterationMessages, { id: `Select Files iteration ${iteration}` }); } +/** + * Generates the user message that we will add to the conversation, which includes the file contents the LLM wishes to inspect + * @param response + */ async function processedIterativeStepUserPrompt(response: IterationResponse): Promise { const ignored = response.ignoreFiles?.map((s) => s.path) ?? []; const kept = response.keepFiles?.map((s) => s.path) ?? []; @@ -225,7 +235,9 @@ async function selectFilesCore( const maxIterations = 10; let iterationCount = 0; - const initialResponse: InitialResponse = await llms().medium.generateTextWithJson(messages, { id: 'Select Files initial' }); + let llm = llms().medium; + + const initialResponse: InitialResponse = await llm.generateTextWithJson(messages, { id: 'Select Files initial' }); messages.push({ role: 'assistant', content: JSON.stringify(initialResponse) }); let filesToInspect = initialResponse.inspectFiles || []; @@ -233,16 +245,18 @@ async function selectFilesCore( const keptFiles = new Set<{ path: string; reason: string }>(); const ignoredFiles = new Set<{ path: string; reason: string }>(); - while (filesToInspect.length > 0) { + while (true) { iterationCount++; if (iterationCount > maxIterations) throw new Error('Maximum interaction iterations reached.'); - const response: IterationResponse = await generateFileSelectionProcessingResponse(messages, filesToInspect); + const response: IterationResponse = await generateFileSelectionProcessingResponse(messages, filesToInspect, iterationCount, llm); logger.info(response); for (const ignored of response.ignoreFiles ?? []) ignoredFiles.add(ignored); for (const kept of response.keepFiles ?? []) keptFiles.add(kept); + // Create the user message with the additional file contents to inspect messages.push(await processedIterativeStepUserPrompt(response)); + // Don't cache the final result as it would only potentially be used once when generating a query answer const cache = filesToInspect.length ? 'ephemeral' : undefined; messages.push({ @@ -254,13 +268,23 @@ async function selectFilesCore( // Max of 4 cache tags with Anthropic. Clear the first one after the cached system prompt const cachedMessages = messages.filter((msg) => msg.cache === 'ephemeral'); if (cachedMessages.length > 4) { - logger.info('Removing cache tag'); cachedMessages[1].cache = undefined; } filesToInspect = response.inspectFiles; - // TODO if keepFiles and ignoreFiles doesnt have all of the files in filesToInspect, then + // We start the file selection process with the medium agent for speed/cost. + // Once the medium LLM has completed, then we switch to the hard LLM as a review, + // which may continue inspecting files until it is satisfied. + if (!filesToInspect || filesToInspect.length === 0) { + if (llm === llms().medium) { + llm = llms().hard; + } else { + break; + } + } + + // TODO if keepFiles and ignoreFiles doesnt have all of the files in filesToInspect, then get the LLM to try again // filesToInspect = filesToInspect.filter((path) => !keptFiles.has(path) && !ignoredFiles.has(path)); } diff --git a/src/swe/discovery/selectFilesToEdit.ts b/src/swe/discovery/selectFilesToEdit.ts index a27fc819..ab23b3f7 100644 --- a/src/swe/discovery/selectFilesToEdit.ts +++ b/src/swe/discovery/selectFilesToEdit.ts @@ -3,8 +3,8 @@ import path from 'path'; import { createByModelName } from '@microsoft/tiktokenizer'; import { getFileSystem, llms } from '#agent/agentContextLocalStorage'; import { logger } from '#o11y/logger'; -import { getRepositoryOverview } from '#swe/repoIndexDocBuilder'; -import { RepositoryMaps, generateRepositoryMaps } from '#swe/repositoryMap'; +import { getRepositoryOverview } from '#swe/index/repoIndexDocBuilder'; +import { RepositoryMaps, generateRepositoryMaps } from '#swe/index/repositoryMap'; import { ProjectInfo, getProjectInfo } from '../projectDetection'; export interface SelectFilesResponse { diff --git a/src/swe/repoIndexDocBuilder.ts b/src/swe/index/repoIndexDocBuilder.ts similarity index 99% rename from src/swe/repoIndexDocBuilder.ts rename to src/swe/index/repoIndexDocBuilder.ts index 4899ba51..6eaa2ccd 100644 --- a/src/swe/repoIndexDocBuilder.ts +++ b/src/swe/index/repoIndexDocBuilder.ts @@ -6,7 +6,7 @@ import { getFileSystem, llms } from '#agent/agentContextLocalStorage'; import { logger } from '#o11y/logger'; import { withActiveSpan } from '#o11y/trace'; import { errorToString } from '#utils/errors'; -import { sophiaDirName } from '../appVars'; +import { sophiaDirName } from '../../appVars'; /** * This module builds summary documentation for a project/repository, to assist with searching in the repository. @@ -569,7 +569,7 @@ export async function loadBuildDocsSummaries(createIfNotExits = false): Promise< const fss = getFileSystem(); // If in a git repo use the repo root to store the summary index files - const repoFolder = (await fss.getGitRoot()) ?? fss.getWorkingDirectory(); + const repoFolder = (await fss.getVcsRoot()) ?? fss.getWorkingDirectory(); const docsDir = join(repoFolder, sophiaDirName, 'docs'); logger.info(`Load summaries from ${docsDir}`); diff --git a/src/swe/repositoryMap.ts b/src/swe/index/repositoryMap.ts similarity index 100% rename from src/swe/repositoryMap.ts rename to src/swe/index/repositoryMap.ts diff --git a/src/swe/lang/nodejs/typescriptTools.ts b/src/swe/lang/nodejs/typescriptTools.ts index 3a713b58..549f7cb5 100644 --- a/src/swe/lang/nodejs/typescriptTools.ts +++ b/src/swe/lang/nodejs/typescriptTools.ts @@ -37,7 +37,7 @@ export class TypescriptTools implements LanguageTools { // Note that the project needs to be in a compilable state otherwise this will fail logger.info('Generating TypeScript project map'); const fss = getFileSystem(); - const rootFolder = (await fss.getGitRoot()) ?? fss.getWorkingDirectory(); + const rootFolder = (await fss.getVcsRoot()) ?? fss.getWorkingDirectory(); const dtsFolder = join(rootFolder, sophiaDirName, 'dts'); await promisify(fs.mkdir)(dtsFolder, { recursive: true }); const tsConfigExists = await fss.fileExists('tsconfig.json'); diff --git a/src/swe/projectDetection.ts b/src/swe/projectDetection.ts index e79a82d1..d6330a7e 100644 --- a/src/swe/projectDetection.ts +++ b/src/swe/projectDetection.ts @@ -1,4 +1,4 @@ -import { existsSync, readFileSync } from 'fs'; +import { existsSync, readFileSync, writeFileSync } from 'fs'; import path, { join } from 'path'; import { getFileSystem, llms } from '#agent/agentContextLocalStorage'; import { logger } from '#o11y/logger'; @@ -87,12 +87,12 @@ export async function detectProjectInfo(requirements?: string): Promise ${requirements ? `\n${requirements}\n\n` : ''} -${files.join('\n')} +${tree} You task it to detect key information (language/runtime and build/test commands) for a software project from the names of the files contained within it${ requirements ? ' and the ' : '' diff --git a/src/user/user.ts b/src/user/user.ts index e74e9295..0b6012c8 100644 --- a/src/user/user.ts +++ b/src/user/user.ts @@ -3,13 +3,14 @@ export interface LLMServicesConfig { vertexRegion?: string; anthropicKey?: string; - openaiKey?: string; - groqKey?: string; - togetheraiKey?: string; - deepseekKey?: string; - fireworksKey?: string; cerebrasKey?: string; deepinfraKey?: string; + deepseekKey?: string; + fireworksKey?: string; + groqKey?: string; + nebiusKey?: string; + openaiKey?: string; + togetheraiKey?: string; xaiKey?: string; } diff --git a/src/utils/exec.ts b/src/utils/exec.ts index 0180dfdf..dfaed827 100644 --- a/src/utils/exec.ts +++ b/src/utils/exec.ts @@ -1,5 +1,5 @@ -import { ExecException, SpawnOptionsWithoutStdio, exec, spawn } from 'child_process'; -import { existsSync } from 'fs'; +import { ExecException, ExecSyncOptions, SpawnOptionsWithoutStdio, exec, execSync, spawn } from 'child_process'; +import { existsSync, readFileSync } from 'fs'; import { ExecOptions } from 'node:child_process'; import os from 'os'; import path from 'path'; @@ -23,6 +23,58 @@ export function checkExecResult(result: ExecResults, message: string) { } } +function getAvailableShell(): string { + const possibleShells = ['/bin/zsh', '/usr/bin/zsh', '/bin/bash', '/usr/bin/bash', '/bin/sh', '/usr/bin/sh']; + for (const shellPath of possibleShells) { + if (existsSync(shellPath)) { + return shellPath; + } + } + if (process.env.SHELL && existsSync(process.env.SHELL)) { + return process.env.SHELL; + } + throw new Error('No suitable shell found for executing commands.'); +} + +export function execCmdSync(command: string, cwd = getFileSystem().getWorkingDirectory()): ExecResults { + const home = process.env.HOME; + + if (command.startsWith('~') && home) command = home + command.substring(1); + try { + const shell = getAvailableShell(); + logger.info(`execCmdSync ${command}\ncwd: ${cwd}\nshell: ${shell}`); + + const options: ExecSyncOptions = { + cwd, + shell, + encoding: 'utf8', + env: { ...process.env, PATH: `${process.env.PATH}:/bin:/usr/bin` }, + }; + + let stdout = execSync(command, options); + if (typeof stdout !== 'string') stdout = stdout.toString(); + + logger.info(stdout); + + return { + cmd: command, + stdout, + stderr: '', + error: null, + cwd, + }; + } catch (error) { + logger.error('Error executing command:', error); + return { + cmd: command, + stdout: error.stdout?.toString() || '', + stderr: error.stderr?.toString() || '', + error, + cwd, + }; + } +} + export interface ExecResults { cmd: string; stdout: string; @@ -36,12 +88,12 @@ export interface ExecResults { * @param cwd current working directory * @returns */ -export async function execCmd(command: string, cwd = ''): Promise { +export async function execCmd(command: string, cwd = getFileSystem().getWorkingDirectory()): Promise { return withSpan('execCmd', async (span) => { const home = process.env.HOME; logger.info(`execCmd ${home ? command.replace(home, '~') : command} ${cwd}`); - // Need the right shell so git commands work (by having the SSH keys) - const shell = os.platform() === 'darwin' ? '/bin/zsh' : '/bin/bash'; + // Use the available shell + const shell = getAvailableShell(); const result = await new Promise((resolve, reject) => { exec(command, { cwd, shell }, (error, stdout, stderr) => { resolve({ @@ -99,7 +151,7 @@ export interface ExecCmdOptions { export async function execCommand(command: string, opts?: ExecCmdOptions): Promise { return withSpan('execCommand', async (span) => { - const shell = os.platform() === 'darwin' ? '/bin/zsh' : '/bin/bash'; + const shell = getAvailableShell(); const env = opts?.envVars ? { ...process.env, ...opts.envVars } : process.env; const options: ExecOptions = { cwd: opts?.workingDirectory ?? getFileSystem().getWorkingDirectory(), shell, env }; @@ -140,7 +192,7 @@ export async function execCommand(command: string, opts?: ExecCmdOptions): Promi export async function spawnCommand(command: string, workingDirectory?: string): Promise { return withSpan('spawnCommand', async (span) => { - const shell = os.platform() === 'darwin' ? '/bin/zsh' : '/bin/bash'; + const shell = getAvailableShell(); const cwd = workingDirectory ?? getFileSystem().getWorkingDirectory(); const options: SpawnOptionsWithoutStdio = { cwd, shell, env: process.env }; try { diff --git a/tsconfig.json b/tsconfig.json index 18589aaf..0cd2e1d8 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -8,7 +8,7 @@ "compilerOptions": { "lib": ["es2023"], "module": "node16", - "target": "es2022", + "target": "es2023", "outDir": "dist", "noImplicitAny": false, "strictNullChecks": false, diff --git a/variables/local.env.example b/variables/local.env.example index c2bb3756..6786804d 100644 --- a/variables/local.env.example +++ b/variables/local.env.example @@ -37,6 +37,7 @@ DEEPINFRA_API_KEY= CEREBRAS_API_KEY= XAI_API_KEY= DEEPINFRA_API_KEY= +NEBIUS_API_KEY= #OLLAMA_API_URL=http://localhost:11434 GITLAB_TOKEN=