From cf403302a93f8a058c96b3bf6cee42ca8337cb9d Mon Sep 17 00:00:00 2001
From: Han Xiao <han.xiao@jina.ai>
Date: Tue, 25 Feb 2025 15:56:16 +0800
Subject: [PATCH] refactor: schemas

---
 src/agent.ts                |  9 ++--
 src/app.ts                  |  2 +-
 src/tools/evaluator.ts      | 10 +++--
 src/utils/action-tracker.ts | 18 +++++---
 src/utils/i18n.json         | 86 +++++++++++++++++++++++++++++++++++++
 src/utils/schemas.ts        |  2 +-
 src/utils/text-tools.ts     | 36 ++++++++++++++++
 7 files changed, 145 insertions(+), 18 deletions(-)
 create mode 100644 src/utils/i18n.json
diff --git a/src/agent.ts b/src/agent.ts
index ca9792ae..51422339 100644
--- a/src/agent.ts
+++ b/src/agent.ts
@@ -181,7 +181,6 @@ ${allKeywords.join('\n')}
     actionSections.push(`
 <action-answer>
 - For greetings, casual conversation, or general knowledge questions, answer directly without references.
-- If the question is clearly within your knowledge cutoff (i.e. Aug. 2024) and requires no up-to-date knowledge to get better answer, then provide a confident answer directly.
 - For all other questions, provide a verified answer with references. Each reference must include exactQuote and url.
 - If uncertain, use <action-reflect>
 </action-answer>
@@ -356,11 +355,11 @@ export async function getResponse(question?: string,
         return {
           exactQuote: ref.exactQuote,
           title: allURLs[ref.url]?.title,
-          url: normalizeUrl(ref.url)
+          url: ref.url ? normalizeUrl(ref.url) : ''
         }
       });
 
-      context.actionTracker.trackThink(`But wait, let me evaluate the answer first.`)
+      context.actionTracker.trackThink('eval_first', SchemaGen.languageCode)
 
       const evaluation = await evaluateAnswer(currentQuestion, thisStep,
         evaluationMetrics[currentQuestion],
@@ -506,7 +505,7 @@ But then you realized you have asked them before. You decided to to think out of
       if (keywordsQueries.length > 0) {
 
 
-        context.actionTracker.trackThink(`Let me search for "${keywordsQueries.join(', ')}" to gather more information.`)
+        context.actionTracker.trackThink('search_for', SchemaGen.languageCode, {keywords: keywordsQueries.join(', ')});
         for (const query of keywordsQueries) {
           console.log(`Search query: ${query}`);
 
@@ -594,7 +593,7 @@ You decided to think out of the box or cut from a completely different angle.
       const uniqueURLs = thisStep.URLTargets;
 
       if (uniqueURLs.length > 0) {
-        context.actionTracker.trackThink(`Let me read ${uniqueURLs.join(', ')} to gather more information.`);
+        context.actionTracker.trackThink('read_for', SchemaGen.languageCode, {urls: uniqueURLs.join(', ')});
 
         const urlResults = await Promise.all(
           uniqueURLs.map(async url => {
diff --git a/src/app.ts b/src/app.ts
index d16a24e0..87d90acd 100644
--- a/src/app.ts
+++ b/src/app.ts
@@ -132,7 +132,7 @@ function calculateDelay(chunk: string, burstMode: boolean): number {
 
   // Special handling for CJK characters
   if (/^[\u4e00-\u9fff\u3040-\u30ff\uac00-\ud7af]$/.test(chunk)) {
-    return Math.random() * 50 + 10; // Longer delay for individual CJK characters
+    return Math.random() * 25 + 10; // Longer delay for individual CJK characters
   }
 
   // Base delay calculation
diff --git a/src/tools/evaluator.ts b/src/tools/evaluator.ts
index 0526ed62..9fad9869 100644
--- a/src/tools/evaluator.ts
+++ b/src/tools/evaluator.ts
@@ -474,7 +474,9 @@ Question: "请解释赤壁之战的历史背景、主要参与者以及战略意
 </examples>
 
 Now evaluate this question:
-Question: ${question}`;
+Question: ${question}
+
+NOTE: "think" field should be in the same language as the question`;
 }
 
 const TOOL_NAME = 'evaluator';
@@ -560,7 +562,7 @@ export async function evaluateAnswer(
       case 'attribution': {
         // Safely handle references and ensure we have content
 
-        const allKnowledge = await fetchSourceContent(uniqueNewURLs, trackers);
+        const allKnowledge = await fetchSourceContent(uniqueNewURLs, trackers, schemaGen);
         visitedURLs.push(...uniqueNewURLs);
 
         if (allKnowledge.trim().length === 0) {
@@ -610,9 +612,9 @@ export async function evaluateAnswer(
 }
 
 // Helper function to fetch and combine source content
-async function fetchSourceContent(urls: string[], trackers: TrackerContext): Promise<string> {
+async function fetchSourceContent(urls: string[], trackers: TrackerContext, schemaGen: Schemas): Promise<string> {
   if (!urls.length) return '';
-  trackers.actionTracker.trackThink('Let me fetch the source content to verify the answer.');
+  trackers.actionTracker.trackThink('read_for_verify', schemaGen.languageCode);
   try {
     const results = await Promise.all(
       urls.map(async (url) => {
diff --git a/src/utils/action-tracker.ts b/src/utils/action-tracker.ts
index c00fa405..2e0b77a6 100644
--- a/src/utils/action-tracker.ts
+++ b/src/utils/action-tracker.ts
@@ -1,5 +1,6 @@
-import { EventEmitter } from 'events';
-import { StepAction } from '../types';
+import {EventEmitter} from 'events';
+import {StepAction} from '../types';
+import {getI18nText} from "./text-tools";
 
 interface ActionState {
   thisStep: StepAction;
@@ -8,6 +9,7 @@ interface ActionState {
   totalStep: number;
 }
 
+
 export class ActionTracker extends EventEmitter {
   private state: ActionState = {
     thisStep: {action: 'answer', answer: '', references: [], think: ''},
@@ -17,18 +19,20 @@ export class ActionTracker extends EventEmitter {
   };
 
   trackAction(newState: Partial<ActionState>) {
-    this.state = { ...this.state, ...newState };
+    this.state = {...this.state, ...newState};
     this.emit('action', this.state.thisStep);
   }
 
-  trackThink(think: string) {
-    // only update the think field of the current state
-    this.state = { ...this.state, thisStep: { ...this.state.thisStep, think } };
+  trackThink(think: string, lang?: string, params = {}) {
+    if (lang) {
+      think = getI18nText(think, lang, params);
+    }
+    this.state = {...this.state, thisStep: {...this.state.thisStep, think}};
     this.emit('action', this.state.thisStep);
   }
 
   getState(): ActionState {
-    return { ...this.state };
+    return {...this.state};
   }
 
   reset() {
diff --git a/src/utils/i18n.json b/src/utils/i18n.json
new file mode 100644
index 00000000..ccf99315
--- /dev/null
+++ b/src/utils/i18n.json
@@ -0,0 +1,86 @@
+{
+  "en": {
+    "eval_first": "But wait, let me evaluate the answer first.",
+    "search_for": "Let me search for ${keywords} to gather more information.",
+    "read_for": "Let me read ${urls} to gather more information.",
+    "read_for_verify": "Let me fetch the source content to verify the answer."
+  },
+  "zh-CN": {
+    "eval_first": "等等，让我先自己评估一下答案。",
+    "search_for": "让我搜索${keywords}来获取更多信息。",
+    "read_for": "让我读取网页${urls}来获取更多信息。",
+    "read_for_verify": "让我读取源网页内容来验证答案。"
+  },
+  "zh-TW": {
+    "eval_first": "等等，讓我先評估一下答案。",
+    "search_for": "讓我搜索${keywords}來獲取更多信息。",
+    "read_for": "讓我閱讀${urls}來獲取更多信息。",
+    "read_for_verify": "讓我獲取源內容來驗證答案。"
+  },
+  "ja": {
+    "eval_first": "ちょっと待って、まず答えを評価します。",
+    "search_for": "キーワード${keywords}で検索して、情報を集めます。",
+    "read_for": "URL${urls}を読んで、情報を集めます。",
+    "read_for_verify": "答えを確認するために、ソースコンテンツを取得します。"
+  },
+  "ko": {
+    "eval_first": "잠시만요, 먼저 답변을 평가해 보겠습니다.",
+    "search_for": "키워드 ${keywords}로 검색하여 더 많은 정보를 수집하겠습니다.",
+    "read_for": "URL ${urls}을 읽어 더 많은 정보를 수집하겠습니다.",
+    "read_for_verify": "답변을 확인하기 위해 소스 콘텐츠를 가져오겠습니다."
+  },
+  "fr": {
+    "eval_first": "Un instant, je vais d'abord évaluer la réponse.",
+    "search_for": "Je vais rechercher ${keywords} pour obtenir plus d'informations.",
+    "read_for": "Je vais lire ${urls} pour obtenir plus d'informations.",
+    "read_for_verify": "Je vais récupérer le contenu source pour vérifier la réponse."
+  },
+  "de": {
+    "eval_first": "Einen Moment, ich werde die Antwort zuerst evaluieren.",
+    "search_for": "Ich werde nach ${keywords} suchen, um weitere Informationen zu sammeln.",
+    "read_for": "Ich werde ${urls} lesen, um weitere Informationen zu sammeln.",
+    "read_for_verify": "Ich werde den Quellinhalt abrufen, um die Antwort zu überprüfen."
+  },
+  "es": {
+    "eval_first": "Un momento, voy a evaluar la respuesta primero.",
+    "search_for": "Voy a buscar ${keywords} para recopilar más información.",
+    "read_for": "Voy a leer ${urls} para recopilar más información.",
+    "read_for_verify": "Voy a obtener el contenido fuente para verificar la respuesta."
+  },
+  "it": {
+    "eval_first": "Un attimo, valuterò prima la risposta.",
+    "search_for": "Cercherò ${keywords} per raccogliere ulteriori informazioni.",
+    "read_for": "Leggerò ${urls} per raccogliere ulteriori informazioni.",
+    "read_for_verify": "Recupererò il contenuto sorgente per verificare la risposta."
+  },
+  "pt": {
+    "eval_first": "Um momento, vou avaliar a resposta primeiro.",
+    "search_for": "Vou pesquisar ${keywords} para reunir mais informações.",
+    "read_for": "Vou ler ${urls} para reunir mais informações.",
+    "read_for_verify": "Vou buscar o conteúdo da fonte para verificar a resposta."
+  },
+  "ru": {
+    "eval_first": "Подождите, я сначала оценю ответ.",
+    "search_for": "Дайте мне поискать ${keywords} для сбора дополнительной информации.",
+    "read_for": "Дайте мне прочитать ${urls} для сбора дополнительной информации.",
+    "read_for_verify": "Дайте мне получить исходный контент для проверки ответа."
+  },
+  "ar": {
+    "eval_first": "لكن انتظر، دعني أقوم بتقييم الإجابة أولاً.",
+    "search_for": "دعني أبحث عن ${keywords} لجمع المزيد من المعلومات.",
+    "read_for": "دعني أقرأ ${urls} لجمع المزيد من المعلومات.",
+    "read_for_verify": "دعني أحضر محتوى المصدر للتحقق من الإجابة."
+  },
+  "nl": {
+    "eval_first": "Een moment, ik zal het antwoord eerst evalueren.",
+    "search_for": "Ik zal zoeken naar ${keywords} om meer informatie te verzamelen.",
+    "read_for": "Ik zal ${urls} lezen om meer informatie te verzamelen.",
+    "read_for_verify": "Ik zal de broninhoud ophalen om het antwoord te verifiëren."
+  },
+  "zh": {
+    "eval_first": "等等，让我先评估一下答案。",
+    "search_for": "让我搜索${keywords}来获取更多信息。",
+    "read_for": "让我阅读${urls}来获取更多信息。",
+    "read_for_verify": "让我获取源内容来验证答案。"
+  }
+}
\ No newline at end of file
diff --git a/src/utils/schemas.ts b/src/utils/schemas.ts
index 796e433a..b14b4fdf 100644
--- a/src/utils/schemas.ts
+++ b/src/utils/schemas.ts
@@ -61,7 +61,7 @@ ${question}`;
 
 export class Schemas {
   private languageStyle: string = 'formal English';
-  private languageCode: string = 'en';
+  public languageCode: string = 'en';
 
 
   constructor(query: string) {
diff --git a/src/utils/text-tools.ts b/src/utils/text-tools.ts
index c045b560..3e3a2547 100644
--- a/src/utils/text-tools.ts
+++ b/src/utils/text-tools.ts
@@ -125,3 +125,39 @@ export function chooseK(a: string[], k: number) {
 export function removeHTMLtags(text: string) {
   return text.replace(/<[^>]*>?/gm, '');
 }
+
+
+export function getI18nText(key: string, lang = 'en', params: Record<string, string> = {}) {
+  // 获取i18n数据
+  const i18nData = require('./i18n.json');
+
+  // 确保语言代码存在，如果不存在则使用英语作为后备
+  if (!i18nData[lang]) {
+    console.error(`Language '${lang}' not found, falling back to English.`);
+    lang = 'en';
+  }
+
+  // 获取对应语言的文本
+  let text = i18nData[lang][key];
+
+  // 如果文本不存在，则使用英语作为后备
+  if (!text) {
+    console.error(`Key '${key}' not found for language '${lang}', falling back to English.`);
+    text = i18nData['en'][key];
+
+    // 如果英语版本也不存在，则返回键名
+    if (!text) {
+      console.error(`Key '${key}' not found for English either.`);
+      return key;
+    }
+  }
+
+  // 替换模板中的变量
+  if (params) {
+    Object.keys(params).forEach(paramKey => {
+      text = text.replace(`\${${paramKey}}`, params[paramKey]);
+    });
+  }
+
+  return text;
+}
\ No newline at end of file