Skip to content

Commit

Permalink
feat: add hard token limits to toolcalls and chat history to prevent …
Browse files Browse the repository at this point in the history
…context limit overflow

- improve system prompt for better description of memory and scratch space
  • Loading branch information
erik-balfe committed Nov 12, 2024
1 parent f7fd347 commit 13a7202
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 16 deletions.
2 changes: 1 addition & 1 deletion src/ai/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ async function executeTask(
if (parsedResponse.usage) {
const costInfo = countUsageCost(parsedResponse.usage, model);
totalCost += costInfo.costUSD;
logger.debug(
logger.info(
`Step cost: $${costInfo.costUSD.toFixed(6)}, Total cost so far: $${totalCost.toFixed(6)}`,
);
}
Expand Down
53 changes: 50 additions & 3 deletions src/ai/chatHistory.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { MetadataMode, VectorStoreIndex } from "llamaindex";
import { addConversationDocument, initializeVectorStoreIndex } from "../ai/retrieval/vectorStore";
import { CONTEXT_ALLOCATION, ContextAllocationItem } from "../constants";
import { ConversationMetadata, Database, getAllConversationData } from "../utils/database";
import { logger } from "../utils/logger";
import { strigifyFullConversation } from "../utils/strigifyFullConversation";
Expand Down Expand Up @@ -103,10 +104,56 @@ export async function getStoredConversationDataStrins(

if (!relevantContext.length) {
logger.debug("No relevant context found for the current query");
// return null;
}

const memories = relevantContext.map((item) => `<memory>${item}</memory>`).join("\n\n");
// Handle memories truncation
const memoriesRaw = relevantContext.map((item) => `<memory>${item}</memory>`).join("\n\n");
const { truncatedText: memoriesTruncated, wasLimit: memoriesLimit } = truncateText(
memoriesRaw,
CONTEXT_ALLOCATION.memories,
);

if (memoriesLimit) {
logger.warn(memoriesLimit);
}

// Handle chat history truncation
const { truncatedText: chatHistoryTruncated, wasLimit: chatHistoryLimit } = truncateText(
stringifiedConversation,
CONTEXT_ALLOCATION.chatHistory,
);

if (chatHistoryLimit) {
logger.warn(chatHistoryLimit);
}

return { memories, chatHistory: stringifiedConversation };
return {
memories: memoriesTruncated,
chatHistory: chatHistoryTruncated,
};
}

function truncateText(
text: string,
limits: ContextAllocationItem,
): {
truncatedText: string;
wasLimit?: string;
} {
const { maxChars } = limits;

if (text.length <= maxChars) {
return { truncatedText: text };
}

// For chat history, we might want to keep more recent messages
// So we'll take more from the end than the beginning
const endChars = Math.floor(maxChars * 0.7); // 70% from the end
const startChars = maxChars - endChars;

const truncated = `${text.slice(0, startChars)}\n...[TRUNCATED]...\n${text.slice(-endChars)}`;
return {
truncatedText: truncated,
wasLimit: `Text was truncated from ${text.length} to ${truncated.length} characters`,
};
}
40 changes: 35 additions & 5 deletions src/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ export const AI_CONSOLE_AGENT_DIR = "/tmp/ai-console-agent";
export const CURRENT_RUN_FILE_NAME = "current_run_id.txt";

export const MAX_INPUT_LENGTH = 10000;
export const AGENT_CONTEXT_ALLOCATION = "60000"; // New constant for agent context allocation
export const AGENT_CONTEXT_ALLOCATION = "60000";

export const CONFIG_DIR_PATH = path.join(
process.env.HOME || process.env.USERPROFILE || "",
Expand Down Expand Up @@ -83,7 +83,7 @@ export const MODEL_PRICES: Record<string, AIModelConfig> = {
supportsVision: true,
functionCalling: true,
knowledgeCutoff: "08.08.2024",
default: false, // added field
default: false,
},
"gpt-4o": {
id: "gpt-4o",
Expand All @@ -97,7 +97,7 @@ export const MODEL_PRICES: Record<string, AIModelConfig> = {
supportsVision: true,
functionCalling: true,
knowledgeCutoff: "8.08.2024",
default: false, // added field
default: false,
},
"claude-3-5-sonnet-latest": {
id: "claude-3-5-sonnet-latest",
Expand All @@ -111,7 +111,7 @@ export const MODEL_PRICES: Record<string, AIModelConfig> = {
supportsVision: true,
functionCalling: true,
knowledgeCutoff: "April 2024",
default: false, // added field
default: false,
},
"claude-3-5-haiku-latest": {
id: "claude-3-5-haiku-latest",
Expand Down Expand Up @@ -139,6 +139,36 @@ export const MODEL_PRICES: Record<string, AIModelConfig> = {
supportsVision: false,
functionCalling: true,
knowledgeCutoff: "Unknown",
default: false, // added field
default: false,
},
};

export interface ContextAllocationItem {
maxTokens: number;
assumedTokenSize: number;
maxChars: number;
}

export interface ContextAllocation {
memories: ContextAllocationItem;
chatHistory: ContextAllocationItem;
toolOutput: ContextAllocationItem;
}

export const CONTEXT_ALLOCATION = {
memories: {
maxTokens: 10000,
assumedTokenSize: 4,
maxChars: 40000,
},
chatHistory: {
maxTokens: 15000,
assumedTokenSize: 4,
maxChars: 60000,
},
toolOutput: {
maxTokens: 5000,
assumedTokenSize: 4,
maxChars: 20000,
},
};
50 changes: 43 additions & 7 deletions src/tools/executeCommand.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import chalk from "chalk";
import { FunctionTool } from "llamaindex";
import { displayOptionsAndGetInput } from "../cli/interface";
import { CONTEXT_ALLOCATION, ContextAllocationItem } from "../constants";
import { Database } from "../utils/database";
import { logger } from "../utils/logger";
import { runShellCommand } from "../utils/runShellCommand";
Expand Down Expand Up @@ -28,12 +29,25 @@ const executeCommandCallback = async (params: ExecuteCommandParams): Promise<str
try {
const { stdout, stderr } = await runShellCommand(command, { shell: "bash" });
logger.info(`Agent run command:\n$${command}\n`);

const output = stderr || stdout;
logger.debug("output:", output);
const shortOutput = formatShortOutput(output);
logger.debug("shortOutput:", shortOutput);
logger.info(`Agent got command output:\n${shortOutput}\n`);
return JSON.stringify({ stdout, stderr });
const { truncatedOutput, wasLimit } = truncateCommandOutput(output, CONTEXT_ALLOCATION.toolOutput);

if (wasLimit) {
logger.warn(wasLimit);
}

logger.debug("truncated output:", truncatedOutput);
logger.info(`Agent got command output:\n${truncatedOutput}\n`);
return JSON.stringify({
stdout: truncatedOutput,
stderr,
...(wasLimit && {
truncated: true,
originalLength: output.length,
truncatedLength: truncatedOutput.length,
}),
});
} catch (error: unknown) {
const errorMessage = error instanceof Error ? error.message : String(error);
return `Failed to execute command: ${errorMessage}`;
Expand All @@ -45,8 +59,9 @@ export function createExecuteCommandTool(db: Database, conversationId: number) {

return new FunctionTool<ExecuteCommandParams, Promise<string>>(wrappedCallback, {
name: "executeCommand",
description:
"Execute a shell command on the user's host system. Commands that suppose to be interactive (like usual 'git commit') are not supported and must be strongly avoided.",
description: `Execute a shell command on the user's host system. Commands that suppose to be interactive (like usual 'git commit') are not supported and must be strongly avoided.
Note: Command output is limited to ${CONTEXT_ALLOCATION.toolOutput.maxTokens} tokens (approximately ${CONTEXT_ALLOCATION.toolOutput.maxChars} characters).
For commands that might produce large output, consider using more specific commands or adding filters (grep, head, tail, etc.).`,
parameters: {
type: "object",
properties: {
Expand Down Expand Up @@ -80,3 +95,24 @@ export function formatShortOutput(longText: string, numLines: number = 2, assume
return `${longText.slice(0, assumedLineLength)}...${longText.slice(-assumedLineLength)}`;
}
}

function truncateCommandOutput(
output: string,
limits: ContextAllocationItem,
): {
truncatedOutput: string;
wasLimit?: string;
} {
const { maxChars } = limits;

if (output.length <= maxChars) {
return { truncatedOutput: output };
}

// If output is too long, truncate it and format it
const truncated = formatShortOutput(output, 2, maxChars / 4);
return {
truncatedOutput: truncated,
wasLimit: `Output was truncated from ${output.length} to ${truncated.length} characters`,
};
}

0 comments on commit 13a7202

Please sign in to comment.