Merge branch 'main' into feat/CAPTCHA

danny-avila · Mar 2, 2025 · 788094f · 788094f
2 parents 564e85b + 7f6b32f
commit 788094f
Show file tree

Hide file tree

Showing 66 changed files with 6,319 additions and 2,090 deletions.
diff --git a/.env.example b/.env.example
@@ -88,7 +88,7 @@ PROXY=
 #============#
 
 ANTHROPIC_API_KEY=user_provided
-# ANTHROPIC_MODELS=claude-3-5-haiku-20241022,claude-3-5-sonnet-20241022,claude-3-5-sonnet-latest,claude-3-5-sonnet-20240620,claude-3-opus-20240229,claude-3-sonnet-20240229,claude-3-haiku-20240307,claude-2.1,claude-2,claude-1.2,claude-1,claude-1-100k,claude-instant-1,claude-instant-1-100k
+# ANTHROPIC_MODELS=claude-3-7-sonnet-latest,claude-3-7-sonnet-20250219,claude-3-5-haiku-20241022,claude-3-5-sonnet-20241022,claude-3-5-sonnet-latest,claude-3-5-sonnet-20240620,claude-3-opus-20240229,claude-3-sonnet-20240229,claude-3-haiku-20240307,claude-2.1,claude-2,claude-1.2,claude-1,claude-1-100k,claude-instant-1,claude-instant-1-100k
 # ANTHROPIC_REVERSE_PROXY=
 
 #============#
@@ -175,7 +175,7 @@ GOOGLE_KEY=user_provided
 #============#
 
 OPENAI_API_KEY=user_provided
-# OPENAI_MODELS=o1,o1-mini,o1-preview,gpt-4o,chatgpt-4o-latest,gpt-4o-mini,gpt-3.5-turbo-0125,gpt-3.5-turbo-0301,gpt-3.5-turbo,gpt-4,gpt-4-0613,gpt-4-vision-preview,gpt-3.5-turbo-0613,gpt-3.5-turbo-16k-0613,gpt-4-0125-preview,gpt-4-turbo-preview,gpt-4-1106-preview,gpt-3.5-turbo-1106,gpt-3.5-turbo-instruct,gpt-3.5-turbo-instruct-0914,gpt-3.5-turbo-16k
+# OPENAI_MODELS=o1,o1-mini,o1-preview,gpt-4o,gpt-4.5-preview,chatgpt-4o-latest,gpt-4o-mini,gpt-3.5-turbo-0125,gpt-3.5-turbo-0301,gpt-3.5-turbo,gpt-4,gpt-4-0613,gpt-4-vision-preview,gpt-3.5-turbo-0613,gpt-3.5-turbo-16k-0613,gpt-4-0125-preview,gpt-4-turbo-preview,gpt-4-1106-preview,gpt-3.5-turbo-1106,gpt-3.5-turbo-instruct,gpt-3.5-turbo-instruct-0914,gpt-3.5-turbo-16k
 
 DEBUG_OPENAI=false
 
@@ -209,12 +209,6 @@ ASSISTANTS_API_KEY=user_provided
 # More info, including how to enable use of Assistants with Azure here:
 # https://www.librechat.ai/docs/configuration/librechat_yaml/ai_endpoints/azure#using-assistants-with-azure
 
-#============#
-# OpenRouter #
-#============#
-# !!!Warning: Use the variable above instead of this one. Using this one will override the OpenAI endpoint
-# OPENROUTER_API_KEY=
-
 #============#
 # Plugins    #
 #============#

diff --git a/.github/workflows/backend-review.yml b/.github/workflows/backend-review.yml
@@ -61,4 +61,7 @@ jobs:
         run: cd api && npm run test:ci
 
       - name: Run librechat-data-provider unit tests
-        run: cd packages/data-provider && npm run test:ci
+        run: cd packages/data-provider && npm run test:ci
+
+      - name: Run librechat-mcp unit tests
+        run: cd packages/mcp && npm run test:ci
diff --git a/README.md b/README.md
@@ -81,7 +81,7 @@
   - [Fork Messages & Conversations](https://www.librechat.ai/docs/features/fork) for Advanced Context control
 
 - 💬 **Multimodal & File Interactions**:  
-  - Upload and analyze images with Claude 3, GPT-4o, o1, Llama-Vision, and Gemini 📸  
+  - Upload and analyze images with Claude 3, GPT-4.5, GPT-4o, o1, Llama-Vision, and Gemini 📸  
   - Chat with Files using Custom Endpoints, OpenAI, Azure, Anthropic, AWS Bedrock, & Google 🗃️
 
 - 🌎 **Multilingual UI**:  

diff --git a/api/app/clients/AnthropicClient.js b/api/app/clients/AnthropicClient.js
@@ -7,7 +7,7 @@ const {
   getResponseSender,
   validateVisionModel,
 } = require('librechat-data-provider');
-const { encodeAndFormat } = require('~/server/services/Files/images/encode');
+const { SplitStreamHandler: _Handler, GraphEvents } = require('@librechat/agents');
 const {
   truncateText,
   formatMessage,
@@ -16,16 +16,31 @@ const {
   parseParamFromPrompt,
   createContextHandlers,
 } = require('./prompts');
+const {
+  getClaudeHeaders,
+  configureReasoning,
+  checkPromptCacheSupport,
+} = require('~/server/services/Endpoints/anthropic/helpers');
 const { getModelMaxTokens, getModelMaxOutputTokens, matchModelName } = require('~/utils');
 const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
+const { encodeAndFormat } = require('~/server/services/Files/images/encode');
 const Tokenizer = require('~/server/services/Tokenizer');
+const { logger, sendEvent } = require('~/config');
 const { sleep } = require('~/server/utils');
 const BaseClient = require('./BaseClient');
-const { logger } = require('~/config');
 
 const HUMAN_PROMPT = '\n\nHuman:';
 const AI_PROMPT = '\n\nAssistant:';
 
+class SplitStreamHandler extends _Handler {
+  getDeltaContent(chunk) {
+    return (chunk?.delta?.text ?? chunk?.completion) || '';
+  }
+  getReasoningDelta(chunk) {
+    return chunk?.delta?.thinking || '';
+  }
+}
+
 /** Helper function to introduce a delay before retrying */
 function delayBeforeRetry(attempts, baseDelay = 1000) {
   return new Promise((resolve) => setTimeout(resolve, baseDelay * attempts));
@@ -68,6 +83,8 @@ class AnthropicClient extends BaseClient {
     /** The key for the usage object's output tokens
      * @type {string} */
     this.outputTokensKey = 'output_tokens';
+    /** @type {SplitStreamHandler | undefined} */
+    this.streamHandler;
   }
 
   setOptions(options) {
@@ -97,9 +114,10 @@ class AnthropicClient extends BaseClient {
 
     const modelMatch = matchModelName(this.modelOptions.model, EModelEndpoint.anthropic);
     this.isClaude3 = modelMatch.includes('claude-3');
-    this.isLegacyOutput = !modelMatch.includes('claude-3-5-sonnet');
-    this.supportsCacheControl =
-      this.options.promptCache && this.checkPromptCacheSupport(modelMatch);
+    this.isLegacyOutput = !(
+      /claude-3[-.]5-sonnet/.test(modelMatch) || /claude-3[-.]7/.test(modelMatch)
+    );
+    this.supportsCacheControl = this.options.promptCache && checkPromptCacheSupport(modelMatch);
 
     if (
       this.isLegacyOutput &&
@@ -125,7 +143,7 @@ class AnthropicClient extends BaseClient {
         this.options.endpointType ?? this.options.endpoint,
         this.options.endpointTokenConfig,
       ) ??
-      1500;
+      anthropicSettings.maxOutputTokens.reset(this.modelOptions.model);
     this.maxPromptTokens =
       this.options.maxPromptTokens || this.maxContextTokens - this.maxResponseTokens;
 
@@ -171,18 +189,9 @@ class AnthropicClient extends BaseClient {
       options.baseURL = this.options.reverseProxyUrl;
     }
 
-    if (
-      this.supportsCacheControl &&
-      requestOptions?.model &&
-      requestOptions.model.includes('claude-3-5-sonnet')
-    ) {
-      options.defaultHeaders = {
-        'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31',
-      };
-    } else if (this.supportsCacheControl) {
-      options.defaultHeaders = {
-        'anthropic-beta': 'prompt-caching-2024-07-31',
-      };
+    const headers = getClaudeHeaders(requestOptions?.model, this.supportsCacheControl);
+    if (headers) {
+      options.defaultHeaders = headers;
     }
 
     return new Anthropic(options);
@@ -668,29 +677,38 @@ class AnthropicClient extends BaseClient {
    * @returns {Promise<Anthropic.default.Message | Anthropic.default.Completion>} The response from the Anthropic client.
    */
   async createResponse(client, options, useMessages) {
-    return useMessages ?? this.useMessages
+    return (useMessages ?? this.useMessages)
       ? await client.messages.create(options)
       : await client.completions.create(options);
   }
 
+  getMessageMapMethod() {
+    /**
+     * @param {TMessage} msg
+     */
+    return (msg) => {
+      if (msg.text != null && msg.text && msg.text.startsWith(':::thinking')) {
+        msg.text = msg.text.replace(/:::thinking.*?:::/gs, '').trim();
+      }
+
+      return msg;
+    };
+  }
+
   /**
-   * @param {string} modelName
-   * @returns {boolean}
+   * @param {string[]} [intermediateReply]
+   * @returns {string}
    */
-  checkPromptCacheSupport(modelName) {
-    const modelMatch = matchModelName(modelName, EModelEndpoint.anthropic);
-    if (modelMatch.includes('claude-3-5-sonnet-latest')) {
-      return false;
+  getStreamText(intermediateReply) {
+    if (!this.streamHandler) {
+      return intermediateReply?.join('') ?? '';
     }
-    if (
-      modelMatch === 'claude-3-5-sonnet' ||
-      modelMatch === 'claude-3-5-haiku' ||
-      modelMatch === 'claude-3-haiku' ||
-      modelMatch === 'claude-3-opus'
-    ) {
-      return true;
-    }
-    return false;
+
+    const reasoningText = this.streamHandler.reasoningTokens.join('');
+
+    const reasoningBlock = reasoningText.length > 0 ? `:::thinking\n${reasoningText}\n:::\n` : '';
+
+    return `${reasoningBlock}${this.streamHandler.tokens.join('')}`;
   }
 
   async sendCompletion(payload, { onProgress, abortController }) {
@@ -710,7 +728,6 @@ class AnthropicClient extends BaseClient {
       user_id: this.user,
     };
 
-    let text = '';
     const {
       stream,
       model,
@@ -721,22 +738,34 @@ class AnthropicClient extends BaseClient {
       topK: top_k,
     } = this.modelOptions;
 
-    const requestOptions = {
+    let requestOptions = {
       model,
       stream: stream || true,
       stop_sequences,
       temperature,
       metadata,
-      top_p,
-      top_k,
     };
 
     if (this.useMessages) {
       requestOptions.messages = payload;
-      requestOptions.max_tokens = maxOutputTokens || legacy.maxOutputTokens.default;
+      requestOptions.max_tokens =
+        maxOutputTokens || anthropicSettings.maxOutputTokens.reset(requestOptions.model);
     } else {
       requestOptions.prompt = payload;
-      requestOptions.max_tokens_to_sample = maxOutputTokens || 1500;
+      requestOptions.max_tokens_to_sample = maxOutputTokens || legacy.maxOutputTokens.default;
+    }
+
+    requestOptions = configureReasoning(requestOptions, {
+      thinking: this.options.thinking,
+      thinkingBudget: this.options.thinkingBudget,
+    });
+
+    if (!/claude-3[-.]7/.test(model)) {
+      requestOptions.top_p = top_p;
+      requestOptions.top_k = top_k;
+    } else if (requestOptions.thinking == null) {
+      requestOptions.topP = top_p;
+      requestOptions.topK = top_k;
     }
 
     if (this.systemMessage && this.supportsCacheControl === true) {
@@ -756,13 +785,17 @@ class AnthropicClient extends BaseClient {
     }
 
     logger.debug('[AnthropicClient]', { ...requestOptions });
+    this.streamHandler = new SplitStreamHandler({
+      accumulate: true,
+      runId: this.responseMessageId,
+      handlers: {
+        [GraphEvents.ON_RUN_STEP]: (event) => sendEvent(this.options.res, event),
+        [GraphEvents.ON_MESSAGE_DELTA]: (event) => sendEvent(this.options.res, event),
+        [GraphEvents.ON_REASONING_DELTA]: (event) => sendEvent(this.options.res, event),
+      },
+    });
 
-    const handleChunk = (currentChunk) => {
-      if (currentChunk) {
-        text += currentChunk;
-        onProgress(currentChunk);
-      }
-    };
+    let intermediateReply = this.streamHandler.tokens;
 
     const maxRetries = 3;
     const streamRate = this.options.streamRate ?? Constants.DEFAULT_STREAM_RATE;
@@ -783,22 +816,15 @@ class AnthropicClient extends BaseClient {
           });
 
           for await (const completion of response) {
-            // Handle each completion as before
             const type = completion?.type ?? '';
             if (tokenEventTypes.has(type)) {
               logger.debug(`[AnthropicClient] ${type}`, completion);
               this[type] = completion;
             }
-            if (completion?.delta?.text) {
-              handleChunk(completion.delta.text);
-            } else if (completion.completion) {
-              handleChunk(completion.completion);
-            }
-
+            this.streamHandler.handle(completion);
             await sleep(streamRate);
           }
 
-          // Successful processing, exit loop
           break;
         } catch (error) {
           attempts += 1;
@@ -808,6 +834,10 @@ class AnthropicClient extends BaseClient {
 
           if (attempts < maxRetries) {
             await delayBeforeRetry(attempts, 350);
+          } else if (this.streamHandler && this.streamHandler.reasoningTokens.length) {
+            return this.getStreamText();
+          } else if (intermediateReply.length > 0) {
+            return this.getStreamText(intermediateReply);
           } else {
             throw new Error(`Operation failed after ${maxRetries} attempts: ${error.message}`);
           }
@@ -823,8 +853,7 @@ class AnthropicClient extends BaseClient {
     }
 
     await processResponse.bind(this)();
-
-    return text.trim();
+    return this.getStreamText(intermediateReply);
   }
 
   getSaveOptions() {
@@ -834,6 +863,8 @@ class AnthropicClient extends BaseClient {
       promptPrefix: this.options.promptPrefix,
       modelLabel: this.options.modelLabel,
       promptCache: this.options.promptCache,
+      thinking: this.options.thinking,
+      thinkingBudget: this.options.thinkingBudget,
       resendFiles: this.options.resendFiles,
       iconURL: this.options.iconURL,
       greeting: this.options.greeting,