diff --git a/.vscode/launch.json b/.vscode/launch.json index dd5303b..0363faf 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -22,7 +22,7 @@ ], "program": "${workspaceFolder}/cli/translator.mjs", // "args": ["--plain-text", "你好"] - "args": ["--stream", "--to", "Chinese", "--input", "test/data/test_ja_small.srt", "-t", "0", "--batch-sizes", "[2, 3]", "--history-prompt-length", "3"], + "args": ["--stream", "--to", "Chinese", "--input", "test/data/test_ja.srt", "-t", "0", "--batch-sizes", "[5]", "--history-prompt-length", "1", "--experimental-use-full-context"], // "args": ["-i", "./test/data/test_cn_multiline.srt", "--stream", "--batch-sizes", "[5, 10]"], // "args": ["-i", "./test/data/test_cn_multiline.srt", "--stream", "--experimental-structured-mode", "--experimental-max_token", "4096", "--experimental-input-multiplier", "2", "--batch-sizes", "[5, 10]", "--experimental-fallback-model", "gpt-3.5-turbo-0613"], // "args": ["-i", "./test/data/test_cn_multiline.srt", "--experimental-structured-mode", "object", "--experimental-max_token", "4096", "--experimental-input-multiplier", "2", "--batch-sizes", "[5, 10]"], diff --git a/cli/translator.mjs b/cli/translator.mjs index 7982c5e..762f847 100644 --- a/cli/translator.mjs +++ b/cli/translator.mjs @@ -52,10 +52,12 @@ export function createInstance(args) .option("-f, --file ", "Deprecated: alias for -i, --input") .option("-s, --system-instruction ", "Override the prompt system instruction template `Translate ${from} to ${to}` with this plain text") .option("-p, --plain-text ", "Only translate this input plain text") + .option("--experimental-max_token ", "", parseInt, 0) .option("--experimental-input-multiplier ", "", parseInt, 0) .option("--experimental-fallback-model ", "Model to be used for refusal fallback") .addOption(new Option("--experimental-structured-mode [mode]", "Enable structured response formats as outlined by https://openai.com/index/introducing-structured-outputs-in-the-api/").choices(["array", "object"])) + .option("--experimental-use-full-context", "Use the full history, chunked by historyPromptLength, to work better with prompt caching.") .option("--initial-prompts ", "Initiation prompt messages before the translation request messages in JSON Array", JSON.parse, DefaultOptions.initialPrompts) .option("--no-use-moderator", "Don't use the OpenAI Moderation tool") @@ -107,6 +109,7 @@ export function createInstance(args) ...(opts.experimentalMax_token && { max_token: opts.experimentalMax_token }), ...(opts.experimentalInputMultiplier && { inputMultiplier: opts.experimentalInputMultiplier }), ...(opts.experimentalFallbackModel && { fallbackModel: opts.experimentalFallbackModel }), + ...(opts.experimentalUseFullContext && { useFullContext: opts.experimentalUseFullContext }), ...(opts.logLevel && { logLevel: opts.logLevel }) }; diff --git a/src/translator.mjs b/src/translator.mjs index eda4c20..310e662 100644 --- a/src/translator.mjs +++ b/src/translator.mjs @@ -33,6 +33,8 @@ import { TranslationOutput } from './translatorOutput.mjs'; * Enforce one to one line quantity input output matching * @property {number} historyPromptLength `10` \ * Length of the prompt history to be retained and passed over to the next translation request in order to maintain some context. + * @property {boolean} useFullContext + * Use the full history, chunked by historyPromptLength, to work better with prompt caching. * @property {number[]} batchSizes `[10, 100]` \ * The number of lines to include in each translation prompt, provided that they are estimated to within the token limit. * In case of mismatched output line quantities, this number will be decreased step-by-step according to the values in the array, ultimately reaching one. @@ -55,6 +57,7 @@ export const DefaultOptions = { prefixNumber: true, lineMatching: true, historyPromptLength: 10, + useFullContext: false, batchSizes: [10, 100], structuredMode: false, max_token: 0, @@ -465,10 +468,20 @@ export class Translator { if (this.workingProgress.length === 0 || this.options.historyPromptLength === 0) { - return + return; + } + + let sliced; + if (this.options.useFullContext) + { + // Use the entire workingProgress if useFullContext is true + sliced = this.workingProgress; + } else + { + // Otherwise, slice based on historyPromptLength + sliced = this.workingProgress.slice(-this.options.historyPromptLength); } - const sliced = this.workingProgress.slice(-this.options.historyPromptLength) - const offset = this.workingProgress.length - this.options.historyPromptLength + const offset = this.workingProgress.length - sliced.length; /** * @param {string} text @@ -476,33 +489,45 @@ export class Translator */ const checkFlaggedMapper = (text, index) => { - const id = index + (offset < 0 ? 0 : offset) + const id = index + (offset < 0 ? 0 : offset); if (this.moderatorFlags.has(id)) { // log.warn("[Translator]", "Prompt Flagged", id, text) - return this.preprocessLine("-", id, 0) + return this.preprocessLine("-", id, 0); } - return text - } + return text; + }; - const checkedSource = sliced.map((x, i) => checkFlaggedMapper(x.source, i)) - const checkedTransform = sliced.map((x, i) => checkFlaggedMapper(x.transform, i)) - this.promptContext = this.getContext(checkedSource, checkedTransform) + const checkedSource = sliced.map((x, i) => checkFlaggedMapper(x.source, i)); + const checkedTransform = sliced.map((x, i) => checkFlaggedMapper(x.transform, i)); + this.promptContext = this.getContext(checkedSource, checkedTransform); } - /** * @param {string[]} sourceLines * @param {string[]} transformLines */ getContext(sourceLines, transformLines) { - return /** @type {import('openai').OpenAI.Chat.ChatCompletionMessage[]}*/ ([ - { role: "user", content: this.getContextLines(sourceLines, "user") }, - { role: "assistant", content: this.getContextLines(transformLines, "assistant") } - ]) + const chunks = []; + const chunkSize = this.options.historyPromptLength; + for (let i = 0; i < sourceLines.length; i += chunkSize) + { + const sourceChunk = sourceLines.slice(i, i + chunkSize); + const transformChunk = transformLines.slice(i, i + chunkSize); + chunks.push({ + role: "user", + content: this.getContextLines(sourceChunk, "user") + }); + chunks.push({ + role: "assistant", + content: this.getContextLines(transformChunk, "assistant") + }); + } + return /** @type {import('openai').OpenAI.Chat.ChatCompletionMessage[]}*/ (chunks); } + /** * @param {string[]} lines * @param {"user" | "assistant" } role