feat: add hard token limits to toolcalls and chat history to prevent …

…context limit overflow - improve system prompt for better description of memory and scratch space
erik-balfe · Nov 12, 2024 · 13a7202 · 13a7202
1 parent f7fd347
commit 13a7202
Show file tree

Hide file tree

Showing 4 changed files with 129 additions and 16 deletions.
diff --git a/src/ai/agent.ts b/src/ai/agent.ts
@@ -198,7 +198,7 @@ async function executeTask(
       if (parsedResponse.usage) {
         const costInfo = countUsageCost(parsedResponse.usage, model);
         totalCost += costInfo.costUSD;
-        logger.debug(
+        logger.info(
           `Step cost: $${costInfo.costUSD.toFixed(6)}, Total cost so far: $${totalCost.toFixed(6)}`,
         );
       }

diff --git a/src/ai/chatHistory.ts b/src/ai/chatHistory.ts
@@ -1,5 +1,6 @@
 import { MetadataMode, VectorStoreIndex } from "llamaindex";
 import { addConversationDocument, initializeVectorStoreIndex } from "../ai/retrieval/vectorStore";
+import { CONTEXT_ALLOCATION, ContextAllocationItem } from "../constants";
 import { ConversationMetadata, Database, getAllConversationData } from "../utils/database";
 import { logger } from "../utils/logger";
 import { strigifyFullConversation } from "../utils/strigifyFullConversation";
@@ -103,10 +104,56 @@ export async function getStoredConversationDataStrins(
 
   if (!relevantContext.length) {
     logger.debug("No relevant context found for the current query");
-    // return null;
   }
 
-  const memories = relevantContext.map((item) => `<memory>${item}</memory>`).join("\n\n");
+  // Handle memories truncation
+  const memoriesRaw = relevantContext.map((item) => `<memory>${item}</memory>`).join("\n\n");
+  const { truncatedText: memoriesTruncated, wasLimit: memoriesLimit } = truncateText(
+    memoriesRaw,
+    CONTEXT_ALLOCATION.memories,
+  );
+
+  if (memoriesLimit) {
+    logger.warn(memoriesLimit);
+  }
+
+  // Handle chat history truncation
+  const { truncatedText: chatHistoryTruncated, wasLimit: chatHistoryLimit } = truncateText(
+    stringifiedConversation,
+    CONTEXT_ALLOCATION.chatHistory,
+  );
+
+  if (chatHistoryLimit) {
+    logger.warn(chatHistoryLimit);
+  }
 
-  return { memories, chatHistory: stringifiedConversation };
+  return {
+    memories: memoriesTruncated,
+    chatHistory: chatHistoryTruncated,
+  };
+}
+
+function truncateText(
+  text: string,
+  limits: ContextAllocationItem,
+): {
+  truncatedText: string;
+  wasLimit?: string;
+} {
+  const { maxChars } = limits;
+
+  if (text.length <= maxChars) {
+    return { truncatedText: text };
+  }
+
+  // For chat history, we might want to keep more recent messages
+  // So we'll take more from the end than the beginning
+  const endChars = Math.floor(maxChars * 0.7); // 70% from the end
+  const startChars = maxChars - endChars;
+
+  const truncated = `${text.slice(0, startChars)}\n...[TRUNCATED]...\n${text.slice(-endChars)}`;
+  return {
+    truncatedText: truncated,
+    wasLimit: `Text was truncated from ${text.length} to ${truncated.length} characters`,
+  };
 }
diff --git a/src/constants.ts b/src/constants.ts
@@ -20,7 +20,7 @@ export const AI_CONSOLE_AGENT_DIR = "/tmp/ai-console-agent";
 export const CURRENT_RUN_FILE_NAME = "current_run_id.txt";
 
 export const MAX_INPUT_LENGTH = 10000;
-export const AGENT_CONTEXT_ALLOCATION = "60000"; // New constant for agent context allocation
+export const AGENT_CONTEXT_ALLOCATION = "60000";
 
 export const CONFIG_DIR_PATH = path.join(
   process.env.HOME || process.env.USERPROFILE || "",
@@ -83,7 +83,7 @@ export const MODEL_PRICES: Record<string, AIModelConfig> = {
     supportsVision: true,
     functionCalling: true,
     knowledgeCutoff: "08.08.2024",
-    default: false, // added field
+    default: false,
   },
   "gpt-4o": {
     id: "gpt-4o",
@@ -97,7 +97,7 @@ export const MODEL_PRICES: Record<string, AIModelConfig> = {
     supportsVision: true,
     functionCalling: true,
     knowledgeCutoff: "8.08.2024",
-    default: false, // added field
+    default: false,
   },
   "claude-3-5-sonnet-latest": {
     id: "claude-3-5-sonnet-latest",
@@ -111,7 +111,7 @@ export const MODEL_PRICES: Record<string, AIModelConfig> = {
     supportsVision: true,
     functionCalling: true,
     knowledgeCutoff: "April 2024",
-    default: false, // added field
+    default: false,
   },
   "claude-3-5-haiku-latest": {
     id: "claude-3-5-haiku-latest",
@@ -139,6 +139,36 @@ export const MODEL_PRICES: Record<string, AIModelConfig> = {
     supportsVision: false,
     functionCalling: true,
     knowledgeCutoff: "Unknown",
-    default: false, // added field
+    default: false,
+  },
+};
+
+export interface ContextAllocationItem {
+  maxTokens: number;
+  assumedTokenSize: number;
+  maxChars: number;
+}
+
+export interface ContextAllocation {
+  memories: ContextAllocationItem;
+  chatHistory: ContextAllocationItem;
+  toolOutput: ContextAllocationItem;
+}
+
+export const CONTEXT_ALLOCATION = {
+  memories: {
+    maxTokens: 10000,
+    assumedTokenSize: 4,
+    maxChars: 40000,
+  },
+  chatHistory: {
+    maxTokens: 15000,
+    assumedTokenSize: 4,
+    maxChars: 60000,
+  },
+  toolOutput: {
+    maxTokens: 5000,
+    assumedTokenSize: 4,
+    maxChars: 20000,
   },
 };
diff --git a/src/tools/executeCommand.ts b/src/tools/executeCommand.ts
@@ -1,6 +1,7 @@
 import chalk from "chalk";
 import { FunctionTool } from "llamaindex";
 import { displayOptionsAndGetInput } from "../cli/interface";
+import { CONTEXT_ALLOCATION, ContextAllocationItem } from "../constants";
 import { Database } from "../utils/database";
 import { logger } from "../utils/logger";
 import { runShellCommand } from "../utils/runShellCommand";
@@ -28,12 +29,25 @@ const executeCommandCallback = async (params: ExecuteCommandParams): Promise<str
   try {
     const { stdout, stderr } = await runShellCommand(command, { shell: "bash" });
     logger.info(`Agent run command:\n$${command}\n`);
+
     const output = stderr || stdout;
-    logger.debug("output:", output);
-    const shortOutput = formatShortOutput(output);
-    logger.debug("shortOutput:", shortOutput);
-    logger.info(`Agent got command output:\n${shortOutput}\n`);
-    return JSON.stringify({ stdout, stderr });
+    const { truncatedOutput, wasLimit } = truncateCommandOutput(output, CONTEXT_ALLOCATION.toolOutput);
+
+    if (wasLimit) {
+      logger.warn(wasLimit);
+    }
+
+    logger.debug("truncated output:", truncatedOutput);
+    logger.info(`Agent got command output:\n${truncatedOutput}\n`);
+    return JSON.stringify({
+      stdout: truncatedOutput,
+      stderr,
+      ...(wasLimit && {
+        truncated: true,
+        originalLength: output.length,
+        truncatedLength: truncatedOutput.length,
+      }),
+    });
   } catch (error: unknown) {
     const errorMessage = error instanceof Error ? error.message : String(error);
     return `Failed to execute command: ${errorMessage}`;
@@ -45,8 +59,9 @@ export function createExecuteCommandTool(db: Database, conversationId: number) {
 
   return new FunctionTool<ExecuteCommandParams, Promise<string>>(wrappedCallback, {
     name: "executeCommand",
-    description:
-      "Execute a shell command on the user's host system. Commands that suppose to be interactive (like usual 'git commit') are not supported and must be strongly avoided.",
+    description: `Execute a shell command on the user's host system. Commands that suppose to be interactive (like usual 'git commit') are not supported and must be strongly avoided.
+       Note: Command output is limited to ${CONTEXT_ALLOCATION.toolOutput.maxTokens} tokens (approximately ${CONTEXT_ALLOCATION.toolOutput.maxChars} characters).
+       For commands that might produce large output, consider using more specific commands or adding filters (grep, head, tail, etc.).`,
     parameters: {
       type: "object",
       properties: {
@@ -80,3 +95,24 @@ export function formatShortOutput(longText: string, numLines: number = 2, assume
     return `${longText.slice(0, assumedLineLength)}...${longText.slice(-assumedLineLength)}`;
   }
 }
+
+function truncateCommandOutput(
+  output: string,
+  limits: ContextAllocationItem,
+): {
+  truncatedOutput: string;
+  wasLimit?: string;
+} {
+  const { maxChars } = limits;
+
+  if (output.length <= maxChars) {
+    return { truncatedOutput: output };
+  }
+
+  // If output is too long, truncate it and format it
+  const truncated = formatShortOutput(output, 2, maxChars / 4);
+  return {
+    truncatedOutput: truncated,
+    wasLimit: `Output was truncated from ${output.length} to ${truncated.length} characters`,
+  };
+}