Skip to content

Commit

Permalink
add web search
Browse files Browse the repository at this point in the history
  • Loading branch information
sanbuphy committed Dec 11, 2024
1 parent 70af7d4 commit d703120
Show file tree
Hide file tree
Showing 8 changed files with 68 additions and 23 deletions.
1 change: 1 addition & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ OPENAI_API_BASE="https://api.siliconflow.cn/v1"
OPENAI_API_MODEL='Qwen/Qwen2.5-7B-Instruct'
ZHIPUAI_API_KEY=
HF_TOKEN=
TAVILY_API_KEY=
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ logs/
build/*
tianji.egg-info/*
temp/*
temp_datasets/*
*.bin
*.pyc
test/knowledges/langchain/cache/
Expand Down
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ pip install -e .

为确保项目正常运行,**请在项目内新建`.env`文件,并在其中设置你的API密钥**,你可以根据下列例子写入对应的 key,即可成功运行调用,目前默认使用 [siliconflow](https://cloud.siliconflow.cn/models)[ZhipuAI](https://bigmodel.cn/),你可以获取对应token即可使用。

当前 prompt demo 使用 ZhipuAI api,rag 与 agent demo 使用 siliconflow api,可以根据实际需要进行切换使用
当前 Pormpt demo 使用 ZhipuAI api,rag 与 agent demo 使用 Siliconflow api,你可以填写这两者调用密钥,即可使用 tianji 的全部功能

```
OPENAI_API_KEY=
Expand All @@ -182,8 +182,11 @@ OPENAI_API_BASE=
ZHIPUAI_API_KEY=
OPENAI_API_MODEL=
HF_TOKEN=
TAVILY_API_KEY=
```

如果你想要结合 Agent 中的网络搜索工具给出更好的回答,你需要填写上述环境变量的 TAVILY_API_KEY 进行搜索请求,你可以在 [TAVILY 官网](https://app.tavily.com/home)获取体验免费密钥(个人免费额度)

### 运行

以下给出 prompt 以及 agent 的相关应用方式,在运行前请确保你已经新建`.env`文件:
Expand Down
4 changes: 2 additions & 2 deletions run/demo_agent_metagpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def initialize_sidebar(scenes, sharedData):
container_scene_attribute.write(st.session_state["scene_attr"])
st.button("Clear Chat History", on_click=lambda: on_btn_click(sharedData))
st.checkbox(
"启用网络搜索", value=st.session_state["enable_se"], key="check", on_change=flip
"启用网络搜索(确保填写密钥)", value=st.session_state["enable_se"], key="check", on_change=flip
)


Expand Down Expand Up @@ -189,7 +189,7 @@ async def main():

# 如果开启已网络搜索助手 agent ,运行 agent
if st.session_state["enable_se"] is True:
with st.spinner("SearcherAgent 运行中..."):
with st.spinner("启用搜索引擎,请稍等片刻... 如有报错,请检查密钥是否填写正确"):
await role_search.run(str(sharedData.message_list_for_agent))

sa_res1 = "生成的额外查询:" + str(sharedData.extra_query)
Expand Down
13 changes: 13 additions & 0 deletions test/agents/metagpt/test_WebSearch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
"""
Tavily AI is the leading search engine optimized for LLMs
https://app.tavily.com/
"""
import os
from tavily import TavilyClient

tavily_client = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))

response = tavily_client.search("What is the weather in Shanghai?",max_results=10)

for url in response['results']:
print(url['url'])
9 changes: 4 additions & 5 deletions tianji/agents/metagpt_agents/answerBot/action.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,12 @@

from metagpt.actions import Action
from tianji.agents.metagpt_agents.utils.json_from import SharedDataSingleton
from tianji.agents.metagpt_agents.utils.agent_llm import ZhipuApi as LLMApi
from tianji.agents.metagpt_agents.utils.agent_llm import OpenaiApi as LLMApi
from tianji.agents.metagpt_agents.utils.helper_func import extract_single_type_attributes_and_examples, extract_attribute_descriptions, load_json

from metagpt.logs import logger
"""
回答助手 agent 所对应的 action。
"""


class AnswerQuestion(Action):
PROMPT_TEMPLATE: str = """
#Role:
Expand Down Expand Up @@ -56,6 +54,7 @@ async def run(self, instruction: str):
if "filtered_content" in item:
filtered_dict[index] = item["filtered_content"]

logger.info("AnswerQuestion 最后的回复 agent :scene_attributes scene_attributes_description")
prompt = self.PROMPT_TEMPLATE.format(
scene=scene,
scene_attributes=scene_attributes,
Expand All @@ -66,5 +65,5 @@ async def run(self, instruction: str):
else "",
)

rsp = await LLMApi()._aask(prompt=prompt, temperature=1.00)
rsp = await LLMApi()._aask(prompt=prompt, temperature=0.7)
return rsp
56 changes: 42 additions & 14 deletions tianji/agents/metagpt_agents/searcher/action.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import requests
from bs4 import BeautifulSoup
import re

from tavily import TavilyClient
"""
网络搜索助手 agent 所对应的 action。
"""
Expand Down Expand Up @@ -85,6 +85,7 @@ async def run(self, instruction: str):
raise Exception("Searcher agent failed to response")

ddgs = DDGS()
tavily_client = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
class WebSearch(Action):
name: str = "WebSearch"

Expand All @@ -97,16 +98,26 @@ def search(query):
max_retry = 5
for attempt in range(max_retry):
try:
response = _call_ddgs(query)
response = _call_tavily(query)
result = _parse_response(response)
return result
except Exception as e:
time.sleep(random.randint(2, 5))
raise Exception(
"Failed to get search results from DuckDuckGo after retries."
)

def _call_tavily(query: str, **kwargs) -> dict:
try:
logger.info(f"_call_tavily 正在搜索{query},kwargs为{kwargs}")
response = tavily_client.search(query, max_results=5)
return response
except Exception as e:
raise Exception(f"_call_tavily 搜索{query}出错: {str(e)}")

def _call_ddgs(query: str, **kwargs) -> dict:
"""
TODO ddgs 容易触发202限制,等到后续优化
"""
max_retry = 5
for attempt in range(max_retry):
try:
Expand Down Expand Up @@ -138,14 +149,30 @@ def _parse_response(response: dict) -> dict:
raw_results = []
filtered_results = {}
count = 0
for item in response:
raw_results.append(
(
item["href"],
item["description"] if "description" in item else item["body"],
item["title"],

# 判断是否为 tavily 搜索引擎的结果
if isinstance(response, dict) and 'results' in response:
# tavily 搜索引擎结果解析
for item in response['results']:
raw_results.append(
(
item['url'],
item['content'],
item['title']
)
)
)
else:
# ddgs 搜索引擎结果解析
for item in response:
raw_results.append(
(
item["href"],
item["description"] if "description" in item else item["body"],
item["title"],
)
)

# 过滤和格式化结果
for url, snippet, title in raw_results:
if all(
domain not in url
Expand All @@ -160,7 +187,6 @@ def _parse_response(response: dict) -> dict:
if count >= 20: # 确保最多返回20个网页的内容,可自行根据大模型的 context length 更换合适的参数。
break
return filtered_results

logger.info(f"开始搜索{queries}")
with ThreadPoolExecutor() as executor:
future_to_query = {executor.submit(search, q): q for q in queries}
Expand Down Expand Up @@ -226,7 +252,7 @@ async def run(self, instruction: str):
for attempt in range(max_retry):
try:
rsp = await LLMApi()._aask(prompt=prompt, temperature=1.00)
logger.info("机器人分析需求\n" + rsp)
logger.info("机器人 SelectResult 分析需求\n" + rsp)
rsp = (
rsp.replace("```list", "")
.replace("```", "")
Expand Down Expand Up @@ -281,12 +307,13 @@ def fetch(url: str) -> Tuple[bool, str]:
else:
if web_success:
sharedData.search_results[select_id]["content"] = web_content[
:4096
:1024
]
return ""


class FilterSelectedResult(Action):
# 该处最好用长上下文的模型
PROMPT_TEMPLATE: str = """
#Role:
- 数据抽取小助手。
Expand Down Expand Up @@ -316,7 +343,7 @@ async def ask(result, extra_query):
search_results=result, extra_query=extra_query
)
rsp = await LLMApi()._aask(prompt=prompt, temperature=1.00)
logger.info("机器人分析需求\n" + rsp)
logger.info("机器人 FilterSelectedResult 分析需求\n" + rsp)
return rsp

def run_ask(result, extra_query):
Expand All @@ -342,6 +369,7 @@ def run_ask(result, extra_query):
try:
result = future.result()
except Exception as exc:
logger.error(f"FilterSelectedResult 提取{select_id}出错: {str(exc)}")
pass
else:
sharedData.search_results[select_id]["filtered_content"] = result
Expand Down
2 changes: 1 addition & 1 deletion tianji/agents/metagpt_agents/utils/agent_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ async def _aask(
response = self.client.chat.completions.create(
model=model,
messages=messages,
max_tokens=2048,
max_tokens=4096,
top_p=top_p,
temperature=temperature,
stream=stream,
Expand Down

0 comments on commit d703120

Please sign in to comment.