Skip to content

Commit

Permalink
fix: multi-aspect
Browse files Browse the repository at this point in the history
  • Loading branch information
hanxiao committed Feb 25, 2025
1 parent c02588a commit ad7e524
Show file tree
Hide file tree
Showing 6 changed files with 189 additions and 145 deletions.
55 changes: 24 additions & 31 deletions src/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ import {zodToJsonSchema} from "zod-to-json-schema";
import {ObjectGeneratorSafe} from "./utils/safe-generator";
import {CodeSandbox} from "./tools/code-sandbox";
import {serperSearch} from './tools/serper-search';
import {normalizeUrl} from "./utils/url-tools";
import {getUnvisitedURLs, normalizeUrl} from "./utils/url-tools";
import {buildMdFromAnswer, chooseK, removeExtraLineBreaks, removeHTMLtags} from "./utils/text-tools";

async function sleep(ms: number) {
const seconds = Math.ceil(ms / 1000);
Expand All @@ -40,8 +41,9 @@ function getSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boole

if (allowSearch) {
actions.push("search");
properties.searchQuery = z.string().max(30)
.describe(`Required when action='search'. Must be a short, keyword-based query that BM25, tf-idf based search engines can understand. Write the query in the language that potential answers might be written in, then in ${languageStyle}.`).optional();
properties.searchRequests = z.array(
z.string().max(30)
.describe(`A natual language search request in ${languageStyle}. Based on the deep intention behind the original question and the expected answer format.`)).describe(`Required when action='search'. Always prefer a single request, only add another request if the original question covers multiple aspects or elements and one search request is definitely not enough, each request focus on one specific aspect of the original question. Minimize mutual information between each request. Maximum ${MAX_QUERIES_PER_STEP} search requests.`).max(MAX_QUERIES_PER_STEP);
}

if (allowCoding) {
Expand Down Expand Up @@ -85,11 +87,7 @@ function getSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boole

}

function getUnvisitedURLs(allURLs: Record<string, SearchResult>, visitedURLs: string[]): SearchResult[] {
return Object.entries(allURLs)
.filter(([url]) => !visitedURLs.includes(url))
.map(([, result]) => result);
}


function getPrompt(
context?: string[],
Expand Down Expand Up @@ -226,14 +224,13 @@ ${urlList}
actionSections.push(`
<action-search>
- Use web search to find relevant information
- Choose optimal search queries and language based on the expected answer format
- Focus on one specific aspect of the original question
- Suggest unique keywords and alternative search angles
- Build a search request based on the deep intention behind the original question and the expected answer format
- Always prefer a single search request, only add another request if the original question covers multiple aspects or elements and one query is not enough, each request focus on one specific aspect of the original question
${allKeywords?.length ? `
- Previous unsuccessful queries to avoid:
<bad-queries>
- Avoid those unsuccessful search requests and queries:
<bad-requests>
${allKeywords.join('\n')}
</bad-queries>
</bad-requests>
`.trim() : ''}
</action-search>
`);
Expand All @@ -243,7 +240,7 @@ ${allKeywords.join('\n')}
actionSections.push(`
<action-answer>
- For greetings, casual conversation, or general knowledge questions, answer directly without references.
- If the question is clearly within your knowledge cutoff (i.e. Aug. 2024), provide a confident answer directly.
- If the question is clearly within your knowledge cutoff (i.e. Aug. 2024) and requires no up-to-date knowledge to get better answer, then provide a confident answer directly.
- For all other questions, provide a verified answer with references. Each reference must include exactQuote and url.
- If uncertain, use <action-reflect>
</action-answer>
Expand Down Expand Up @@ -292,24 +289,16 @@ ${actionSections.join('\n\n')}
return removeExtraLineBreaks(sections.join('\n\n'));
}

const removeExtraLineBreaks = (text: string) => {
return text.replace(/\n{2,}/gm, '\n\n');
}


const allContext: StepAction[] = []; // all steps in the current session, including those leads to wrong results

function updateContext(step: any) {
allContext.push(step)
}

function chooseK(a: string[], k: number) {
// randomly sample k from `a` without repitition
return a.sort(() => 0.5 - Math.random()).slice(0, k);
}

function removeHTMLtags(text: string) {
return text.replace(/<[^>]*>?/gm, '');
}



export async function getResponse(question?: string,
Expand Down Expand Up @@ -560,13 +549,15 @@ But then you realized you have asked them before. You decided to to think out of

allowReflect = false;
}
} else if (thisStep.action === 'search' && thisStep.searchQuery) {
} else if (thisStep.action === 'search' && thisStep.searchRequests) {
// dedup search requests
thisStep.searchRequests = chooseK((await dedupQueries(thisStep.searchRequests, [], context.tokenTracker)).unique_queries, MAX_QUERIES_PER_STEP);

// rewrite queries
let {queries: keywordsQueries} = await rewriteQuery(thisStep, context);
const oldKeywords = keywordsQueries;
// avoid exisitng searched queries
const {unique_queries: dedupedQueries} = await dedupQueries(keywordsQueries, allKeywords, context.tokenTracker);
keywordsQueries = chooseK(dedupedQueries, MAX_QUERIES_PER_STEP);
keywordsQueries = chooseK((await dedupQueries(keywordsQueries, allKeywords, context.tokenTracker)).unique_queries, MAX_QUERIES_PER_STEP);

let anyResult = false;

if (keywordsQueries.length > 0) {
Expand Down Expand Up @@ -639,7 +630,7 @@ You found quite some information and add them to your URL list and **visit** the
if (!anyResult || !keywordsQueries?.length) {
diaryContext.push(`
At step ${step}, you took the **search** action and look for external information for the question: "${currentQuestion}".
In particular, you tried to search for the following keywords: ${oldKeywords.join(', ')}.
In particular, you tried to search for the following keywords: ${keywordsQueries.join(', ')}.
But then you realized you have already searched for these keywords before, no new information is returned.
You decided to think out of the box or cut from a completely different angle.
`);
Expand Down Expand Up @@ -800,14 +791,16 @@ But unfortunately, you failed to solve the issue. You need to think out of the b
(thisStep as AnswerAction).isFinal = true;
context.actionTracker.trackAction({totalStep, thisStep, gaps, badAttempts});
}

(thisStep as AnswerAction).mdAnswer = buildMdFromAnswer((thisStep as AnswerAction))
console.log(thisStep)

await storeContext(system, schema, [allContext, allKeywords, allQuestions, allKnowledge], totalStep);
return {
result: thisStep,
context,
visitedURLs: [...new Set([...visitedURLs, ...Object.keys(allURLs)])],
readURLs: visitedURLs
readURLs: visitedURLs,
};

}
Expand Down
107 changes: 8 additions & 99 deletions src/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,102 +28,6 @@ app.get('/health', (req, res) => {
res.json({status: 'ok'});
});

function buildMdFromAnswer(answer: AnswerAction) {
const footnoteRegex = /\[\^(\d+)]/g;

// Helper function to format references
const formatReferences = (refs: typeof answer.references) => {
return refs.map((ref, i) => {
const cleanQuote = ref.exactQuote
.replace(/[^\p{L}\p{N}\s]/gu, ' ')
.replace(/\s+/g, ' ');

const citation = `[^${i + 1}]: ${cleanQuote}`;

if (!ref.url?.startsWith('http')) return citation;

const domainName = new URL(ref.url).hostname.replace('www.', '');
return `${citation} [${domainName}](${ref.url})`;
}).join('\n\n');
};

// First case: no references - remove any footnote citations
if (!answer.references?.length) {
return answer.answer.replace(footnoteRegex, '');
}

// Extract all footnotes from answer
const footnotes: string[] = [];
let match;
while ((match = footnoteRegex.exec(answer.answer)) !== null) {
footnotes.push(match[1]);
}

// No footnotes in answer but we have references - append them at the end
if (footnotes.length === 0) {
const appendedCitations = Array.from(
{length: answer.references.length},
(_, i) => `[^${i + 1}]`
).join('');

const references = formatReferences(answer.references);

return `
${answer.answer}
${appendedCitations}
${references}
`.trim();
}

// Check if correction is needed
const needsCorrection =
(footnotes.length === answer.references.length && footnotes.every(n => n === footnotes[0])) ||
(footnotes.every(n => n === footnotes[0]) && parseInt(footnotes[0]) > answer.references.length) ||
(footnotes.length > 0 && footnotes.every(n => parseInt(n) > answer.references.length));

// New case: we have more references than footnotes
if (answer.references.length > footnotes.length && !needsCorrection) {
// Get the used indices
const usedIndices = new Set(footnotes.map(n => parseInt(n)));

// Create citations for unused references
const unusedReferences = Array.from(
{length: answer.references.length},
(_, i) => !usedIndices.has(i + 1) ? `[^${i + 1}]` : ''
).join('');

return `
${answer.answer}
${unusedReferences}
${formatReferences(answer.references)}
`.trim();
}

if (!needsCorrection) {
return `
${answer.answer}
${formatReferences(answer.references)}
`.trim();
}

// Apply correction: sequentially number the footnotes
let currentIndex = 0;
const correctedAnswer = answer.answer.replace(footnoteRegex, () =>
`[^${++currentIndex}]`
);

return `
${correctedAnswer}
${formatReferences(answer.references)}
`.trim();
}

async function* streamTextNaturally(text: string, streamingState: StreamingState) {
// Split text into chunks that preserve CJK characters, URLs, and regular words
const chunks = splitTextIntoChunks(text);
Expand Down Expand Up @@ -452,6 +356,7 @@ async function processQueue(streamingState: StreamingState, res: Response, reque

streamingState.processingQueue = false;
}

app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
// Check authentication only if secret is set
if (secret) {
Expand Down Expand Up @@ -559,13 +464,17 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
}

try {
const {result: finalStep, visitedURLs: visitedURLs, readURLs: readURLs} = await getResponse(undefined, tokenBudget, maxBadAttempts, context, body.messages)
const {
result: finalStep,
visitedURLs: visitedURLs,
readURLs: readURLs
} = await getResponse(undefined, tokenBudget, maxBadAttempts, context, body.messages)

const usage = context.tokenTracker.getTotalUsageSnakeCase();
if (body.stream) {
// Complete any ongoing streaming before sending final answer
await completeCurrentStreaming(streamingState, res, requestId, created, body.model);
const finalAnswer = buildMdFromAnswer(finalStep as AnswerAction);
const finalAnswer = (finalStep as AnswerAction).mdAnswer;
// Send closing think tag
const closeThinkChunk: ChatCompletionChunk = {
id: requestId,
Expand Down Expand Up @@ -613,7 +522,7 @@ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
index: 0,
message: {
role: 'assistant',
content: finalStep.action === 'answer' ? buildMdFromAnswer(finalStep) : finalStep.think
content: finalStep.action === 'answer' ? (finalStep.mdAnswer || '') : finalStep.think
},
logprobs: null,
finish_reason: 'stop'
Expand Down
33 changes: 19 additions & 14 deletions src/tools/query-rewriter.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { z } from 'zod';
import {z} from 'zod';
import {SearchAction, TrackerContext} from '../types';
import {ObjectGeneratorSafe} from "../utils/safe-generator";

Expand All @@ -13,8 +13,7 @@ const responseSchema = z.object({
});



function getPrompt(action: SearchAction): string {
function getPrompt(query: string, think: string): string {
return `You are an expert search query generator with deep psychological understanding. You optimize user queries by extensively analyzing potential user intents and generating comprehensive search variations.
<rules>
Expand Down Expand Up @@ -185,8 +184,8 @@ queries: [
]
Now, process this query:
Input Query: ${action.searchQuery}
Intention: ${action.think}
Input Query: ${query}
Intention: ${think}
`;
}

Expand All @@ -195,17 +194,23 @@ const TOOL_NAME = 'queryRewriter';
export async function rewriteQuery(action: SearchAction, trackers?: TrackerContext): Promise<{ queries: string[] }> {
try {
const generator = new ObjectGeneratorSafe(trackers?.tokenTracker);
const prompt = getPrompt(action);

const result = await generator.generateObject({
model: TOOL_NAME,
schema: responseSchema,
prompt,
const allQueries = [...action.searchRequests];

const queryPromises = action.searchRequests.map(async (req) => {
const prompt = getPrompt(req, action.think);
const result = await generator.generateObject({
model: TOOL_NAME,
schema: responseSchema,
prompt,
});
trackers?.actionTracker.trackThink(result.object.think);
return result.object.queries;
});

console.log(TOOL_NAME, result.object.queries);
trackers?.actionTracker.trackThink(result.object.think);
return { queries: result.object.queries };
const queryResults = await Promise.all(queryPromises);
queryResults.forEach(queries => allQueries.push(...queries));
console.log(TOOL_NAME, allQueries);
return { queries: allQueries };
} catch (error) {
console.error(`Error in ${TOOL_NAME}`, error);
throw error;
Expand Down
3 changes: 2 additions & 1 deletion src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ type BaseAction = {

export type SearchAction = BaseAction & {
action: "search";
searchQuery: string;
searchRequests: string[];
};

export type AnswerAction = BaseAction & {
Expand All @@ -19,6 +19,7 @@ export type AnswerAction = BaseAction & {
url: string;
}>;
isFinal?: boolean;
mdAnswer?: string;
};


Expand Down
Loading

0 comments on commit ad7e524

Please sign in to comment.