Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ feat: Anthropic Agents Prompt Caching & UI Accessibility Enhancements #6045

Merged
merged 9 commits into from
Feb 26, 2025
92 changes: 14 additions & 78 deletions api/app/clients/AnthropicClient.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ const {
parseParamFromPrompt,
createContextHandlers,
} = require('./prompts');
const {
getClaudeHeaders,
configureReasoning,
checkPromptCacheSupport,
} = require('~/server/services/Endpoints/anthropic/helpers');
const { getModelMaxTokens, getModelMaxOutputTokens, matchModelName } = require('~/utils');
const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
const Tokenizer = require('~/server/services/Tokenizer');
Expand Down Expand Up @@ -101,8 +106,7 @@ class AnthropicClient extends BaseClient {
const modelMatch = matchModelName(this.modelOptions.model, EModelEndpoint.anthropic);
this.isClaude3 = modelMatch.includes('claude-3');
this.isLegacyOutput = !modelMatch.includes('claude-3-5-sonnet');
this.supportsCacheControl =
this.options.promptCache && this.checkPromptCacheSupport(modelMatch);
this.supportsCacheControl = this.options.promptCache && checkPromptCacheSupport(modelMatch);

if (
this.isLegacyOutput &&
Expand Down Expand Up @@ -174,26 +178,9 @@ class AnthropicClient extends BaseClient {
options.baseURL = this.options.reverseProxyUrl;
}

if (
this.supportsCacheControl &&
requestOptions?.model &&
requestOptions.model.includes('claude-3-5-sonnet')
) {
options.defaultHeaders = {
'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31',
};
} else if (
this.supportsCacheControl &&
requestOptions?.model &&
requestOptions.model.includes('claude-3-7')
) {
options.defaultHeaders = {
'anthropic-beta': 'output-128k-2025-02-19,prompt-caching-2024-07-31',
};
} else if (this.supportsCacheControl) {
options.defaultHeaders = {
'anthropic-beta': 'prompt-caching-2024-07-31',
};
const headers = getClaudeHeaders(requestOptions?.model, this.supportsCacheControl);
if (headers) {
options.defaultHeaders = headers;
}

return new Anthropic(options);
Expand Down Expand Up @@ -684,27 +671,6 @@ class AnthropicClient extends BaseClient {
: await client.completions.create(options);
}

/**
* @param {string} modelName
* @returns {boolean}
*/
checkPromptCacheSupport(modelName) {
const modelMatch = matchModelName(modelName, EModelEndpoint.anthropic);
if (modelMatch.includes('claude-3-5-sonnet-latest')) {
return false;
}
if (
modelMatch === 'claude-3-7-sonnet' ||
modelMatch === 'claude-3-5-sonnet' ||
modelMatch === 'claude-3-5-haiku' ||
modelMatch === 'claude-3-haiku' ||
modelMatch === 'claude-3-opus'
) {
return true;
}
return false;
}

getMessageMapMethod() {
/**
* @param {TMessage} msg
Expand Down Expand Up @@ -761,7 +727,7 @@ class AnthropicClient extends BaseClient {
topK: top_k,
} = this.modelOptions;

const requestOptions = {
let requestOptions = {
model,
stream: stream || true,
stop_sequences,
Expand All @@ -780,40 +746,10 @@ class AnthropicClient extends BaseClient {
requestOptions.max_tokens_to_sample = maxOutputTokens || legacy.maxOutputTokens.default;
}

if (
this.options.thinking &&
requestOptions?.model &&
requestOptions.model.includes('claude-3-7')
) {
requestOptions.thinking = {
type: 'enabled',
};
}
if (requestOptions.thinking != null && this.options.thinkingBudget != null) {
requestOptions.thinking = {
...requestOptions.thinking,
budget_tokens: this.options.thinkingBudget,
};
}
if (
requestOptions.thinking != null &&
(requestOptions.max_tokens == null ||
requestOptions.thinking.budget_tokens > requestOptions.max_tokens)
) {
const maxTokens = anthropicSettings.maxOutputTokens.reset(requestOptions.model);
requestOptions.max_tokens = requestOptions.max_tokens ?? maxTokens;

logger.warn(
requestOptions.max_tokens === maxTokens
? '[AnthropicClient] max_tokens is not defined while thinking is enabled. Setting max_tokens to model default.'
: `[AnthropicClient] thinking budget_tokens (${requestOptions.thinking.budget_tokens}) exceeds max_tokens (${requestOptions.max_tokens}). Adjusting budget_tokens.`,
);

requestOptions.thinking.budget_tokens = Math.min(
requestOptions.thinking.budget_tokens,
Math.floor(requestOptions.max_tokens * 0.9),
);
}
requestOptions = configureReasoning(requestOptions, {
thinking: this.options.thinking,
thinkingBudget: this.options.thinkingBudget,
});

if (this.systemMessage && this.supportsCacheControl === true) {
requestOptions.system = [
Expand Down
8 changes: 5 additions & 3 deletions api/app/clients/prompts/addCacheControl.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/**
* Anthropic API: Adds cache control to the appropriate user messages in the payload.
* @param {Array<AnthropicMessage>} messages - The array of message objects.
* @returns {Array<AnthropicMessage>} - The updated array of message objects with cache control added.
* @param {Array<AnthropicMessage | BaseMessage>} messages - The array of message objects.
* @returns {Array<AnthropicMessage | BaseMessage>} - The updated array of message objects with cache control added.
*/
function addCacheControl(messages) {
if (!Array.isArray(messages) || messages.length < 2) {
Expand All @@ -13,7 +13,9 @@ function addCacheControl(messages) {

for (let i = updatedMessages.length - 1; i >= 0 && userMessagesModified < 2; i--) {
const message = updatedMessages[i];
if (message.role !== 'user') {
if (message.getType != null && message.getType() !== 'human') {
continue;
} else if (message.getType == null && message.role !== 'user') {
continue;
}

Expand Down
8 changes: 4 additions & 4 deletions api/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@
"@keyv/mongo": "^2.1.8",
"@keyv/redis": "^2.8.1",
"@langchain/community": "^0.3.14",
"@langchain/core": "^0.3.37",
"@langchain/google-genai": "^0.1.7",
"@langchain/google-vertexai": "^0.1.8",
"@langchain/core": "^0.3.40",
"@langchain/google-genai": "^0.1.9",
"@langchain/google-vertexai": "^0.2.0",
"@langchain/textsplitters": "^0.1.0",
"@librechat/agents": "^2.1.2",
"@librechat/agents": "^2.1.3",
"@waylaidwanderer/fetch-event-source": "^3.0.1",
"axios": "1.7.8",
"bcryptjs": "^2.4.3",
Expand Down
16 changes: 13 additions & 3 deletions api/server/controllers/agents/client.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ const {
} = require('librechat-data-provider');
const {
formatMessage,
addCacheControl,
formatAgentMessages,
formatContentStrings,
createContextHandlers,
Expand Down Expand Up @@ -589,7 +590,7 @@ class AgentClient extends BaseClient {
* @param {number} [i]
* @param {TMessageContentParts[]} [contentData]
*/
const runAgent = async (agent, messages, i = 0, contentData = []) => {
const runAgent = async (agent, _messages, i = 0, contentData = []) => {
config.configurable.model = agent.model_parameters.model;
if (i > 0) {
this.model = agent.model_parameters.model;
Expand Down Expand Up @@ -622,12 +623,21 @@ class AgentClient extends BaseClient {
}

if (noSystemMessages === true && systemContent?.length) {
let latestMessage = messages.pop().content;
let latestMessage = _messages.pop().content;
if (typeof latestMessage !== 'string') {
latestMessage = latestMessage[0].text;
}
latestMessage = [systemContent, latestMessage].join('\n');
messages.push(new HumanMessage(latestMessage));
_messages.push(new HumanMessage(latestMessage));
}

let messages = _messages;
if (
agent.model_parameters?.clientOptions?.defaultHeaders?.['anthropic-beta']?.includes(
'prompt-caching',
)
) {
messages = addCacheControl(messages);
}

run = await createRun({
Expand Down
110 changes: 110 additions & 0 deletions api/server/services/Endpoints/anthropic/helpers.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
const { EModelEndpoint, anthropicSettings } = require('librechat-data-provider');
const { matchModelName } = require('~/utils');
const { logger } = require('~/config');

/**
* @param {string} modelName
* @returns {boolean}
*/
function checkPromptCacheSupport(modelName) {
const modelMatch = matchModelName(modelName, EModelEndpoint.anthropic);
if (
modelMatch.includes('claude-3-5-sonnet-latest') ||
modelMatch.includes('claude-3.5-sonnet-latest')
) {
return false;
}

if (
modelMatch === 'claude-3-7-sonnet' ||
modelMatch === 'claude-3-5-sonnet' ||
modelMatch === 'claude-3-5-haiku' ||
modelMatch === 'claude-3-haiku' ||
modelMatch === 'claude-3-opus' ||
modelMatch === 'claude-3.7-sonnet' ||
modelMatch === 'claude-3.5-sonnet' ||
modelMatch === 'claude-3.5-haiku'
) {
return true;
}

return false;
}

/**
* Gets the appropriate headers for Claude models with cache control
* @param {string} model The model name
* @param {boolean} supportsCacheControl Whether the model supports cache control
* @returns {AnthropicClientOptions['extendedOptions']['defaultHeaders']|undefined} The headers object or undefined if not applicable
*/
function getClaudeHeaders(model, supportsCacheControl) {
if (!supportsCacheControl) {
return undefined;
}

if (/claude-3[-.]5-sonnet/.test(model)) {
return {
'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31',
};
} else if (/claude-3[-.]7/.test(model)) {
return {
'anthropic-beta': 'output-128k-2025-02-19,prompt-caching-2024-07-31',
};
} else {
return {
'anthropic-beta': 'prompt-caching-2024-07-31',
};
}
}

/**
* Configures reasoning-related options for Claude models
* @param {AnthropicClientOptions & { max_tokens?: number }} anthropicInput The request options object
* @param {Object} extendedOptions Additional client configuration options
* @param {boolean} extendedOptions.thinking Whether thinking is enabled in client config
* @param {number|null} extendedOptions.thinkingBudget The token budget for thinking
* @returns {Object} Updated request options
*/
function configureReasoning(anthropicInput, extendedOptions = {}) {
const updatedOptions = { ...anthropicInput };
const currentMaxTokens = updatedOptions.max_tokens ?? updatedOptions.maxTokens;
if (
extendedOptions.thinking &&
updatedOptions?.model &&
/claude-3[-.]7/.test(updatedOptions.model)
) {
updatedOptions.thinking = {
type: 'enabled',
};
}

if (updatedOptions.thinking != null && extendedOptions.thinkingBudget != null) {
updatedOptions.thinking = {
...updatedOptions.thinking,
budget_tokens: extendedOptions.thinkingBudget,
};
}

if (
updatedOptions.thinking != null &&
(currentMaxTokens == null || updatedOptions.thinking.budget_tokens > currentMaxTokens)
) {
const maxTokens = anthropicSettings.maxOutputTokens.reset(updatedOptions.model);
updatedOptions.max_tokens = currentMaxTokens ?? maxTokens;

logger.warn(
updatedOptions.max_tokens === maxTokens
? '[AnthropicClient] max_tokens is not defined while thinking is enabled. Setting max_tokens to model default.'
: `[AnthropicClient] thinking budget_tokens (${updatedOptions.thinking.budget_tokens}) exceeds max_tokens (${updatedOptions.max_tokens}). Adjusting budget_tokens.`,
);

updatedOptions.thinking.budget_tokens = Math.min(
updatedOptions.thinking.budget_tokens,
Math.floor(updatedOptions.max_tokens * 0.9),
);
}

return updatedOptions;
}

module.exports = { checkPromptCacheSupport, getClaudeHeaders, configureReasoning };
18 changes: 17 additions & 1 deletion api/server/services/Endpoints/anthropic/llm.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
const { HttpsProxyAgent } = require('https-proxy-agent');
const { anthropicSettings, removeNullishValues } = require('librechat-data-provider');
const { checkPromptCacheSupport, getClaudeHeaders } = require('./helpers');

/**
* Generates configuration options for creating an Anthropic language model (LLM) instance.
Expand All @@ -20,6 +21,14 @@ const { anthropicSettings, removeNullishValues } = require('librechat-data-provi
* @returns {Object} Configuration options for creating an Anthropic LLM instance, with null and undefined values removed.
*/
function getLLMConfig(apiKey, options = {}) {
const systemOptions = {
thinking: options.modelOptions.thinking ?? anthropicSettings.thinking.default,
promptCache: options.modelOptions.promptCache ?? anthropicSettings.promptCache.default,
thinkingBudget: options.modelOptions.thinkingBudget ?? anthropicSettings.thinkingBudget.default,
};
for (let key in systemOptions) {
delete options.modelOptions[key];
}
const defaultOptions = {
model: anthropicSettings.model.default,
maxOutputTokens: anthropicSettings.maxOutputTokens.default,
Expand All @@ -29,7 +38,7 @@ function getLLMConfig(apiKey, options = {}) {
const mergedOptions = Object.assign(defaultOptions, options.modelOptions);

/** @type {AnthropicClientOptions} */
const requestOptions = {
let requestOptions = {
apiKey,
model: mergedOptions.model,
stream: mergedOptions.stream,
Expand All @@ -42,6 +51,13 @@ function getLLMConfig(apiKey, options = {}) {
clientOptions: {},
};

const supportsCacheControl =
systemOptions.promptCache === true && checkPromptCacheSupport(requestOptions.model);
const headers = getClaudeHeaders(requestOptions.model, supportsCacheControl);
if (headers) {
requestOptions.clientOptions.defaultHeaders = headers;
}

if (options.proxy) {
requestOptions.clientOptions.httpAgent = new HttpsProxyAgent(options.proxy);
}
Expand Down
Loading
Loading