Skip to content

Commit

Permalink
Agent tool support (intel#134)
Browse files Browse the repository at this point in the history
* add test files for openai_tools_agent

Signed-off-by: Xue, Chendi <[email protected]>

* complete to add for tool

Signed-off-by: Xue, Chendi <[email protected]>

* Delete my_app directory to bring ci back

Signed-off-by: jiafu zhang <[email protected]>

* Add http based test for agent tool

Signed-off-by: Xue, Chendi <[email protected]>

* Update llm_on_ray/inference/api_openai_backend/router_app.py

Co-authored-by: Carson Wang <[email protected]>
Signed-off-by: Chendi.Xue <[email protected]>

* remove ref app

Signed-off-by: Xue, Chendi <[email protected]>

* update UT

Signed-off-by: Xue, Chendi <[email protected]>

---------

Signed-off-by: Xue, Chendi <[email protected]>
Signed-off-by: jiafu zhang <[email protected]>
Signed-off-by: Chendi.Xue <[email protected]>
Co-authored-by: jiafu zhang <[email protected]>
Co-authored-by: Carson Wang <[email protected]>
  • Loading branch information
3 people authored Mar 26, 2024
1 parent aa2d08e commit 6d33b49
Show file tree
Hide file tree
Showing 14 changed files with 795 additions and 41 deletions.
8 changes: 8 additions & 0 deletions .github/workflows/workflow_inference.yml
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,14 @@ jobs:
docker exec "${TARGET}" bash -c "python examples/inference/api_server_openai/query_http_requests.py --model_name ${{ matrix.model }}"
fi
- name: Run Agent tool Inference Test with REST API
run: |
TARGET=${{steps.target.outputs.target}}
if [[ ${{ matrix.model }} == "llama-2-7b-chat-hf" ]]; then
docker exec "${TARGET}" bash -c "llm_on_ray-serve --models ${{ matrix.model }}"
docker exec "${TARGET}" bash -c "python examples/inference/api_server_openai/query_http_requests_tool.py --model_name ${{ matrix.model }}"
fi
- name: Stop Ray
run: |
TARGET=${{steps.target.outputs.target}}
Expand Down
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
# with [tools.setuptools] in pyproject.toml, the configs below work in both baremetal and container
include inference/**/*.yaml
include inference/**/*.jinja
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
#
# Copyright 2023 The LLM-on-Ray Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import argparse
import os

from langchain_openai import ChatOpenAI
from langchain.callbacks import StreamingStdOutCallbackHandler, StdOutCallbackHandler
from langchain.agents import AgentExecutor, create_openai_tools_agent
from langchain import hub

parser = argparse.ArgumentParser(
description="Example script of enable langchain agent", add_help=True
)
parser.add_argument(
"--model_name",
default="mistral-7b-instruct-v0.2",
type=str,
help="The name of model to request",
)
parser.add_argument(
"--streaming_response",
default=False,
action="store_true",
help="Whether to enable streaming response",
)
parser.add_argument(
"--prompt_template",
default="hwchase17/openai-tools-agent",
type=str,
help="prompt template for openai tools agent",
)
parser.add_argument(
"--max_tokens",
default="512",
type=int,
help="max number of tokens used in this example",
)

args = parser.parse_args()

if "OPENAI_API_KEY" in os.environ:
openai_api_key = os.environ["OPENAI_API_KEY"]
else:
openai_api_key = "not_needed"

if "OPENAI_BASE_URL" in os.environ:
openai_base_url = os.environ["OPENAI_BASE_URL"]
elif openai_api_key == "not_needed":
openai_base_url = "http://localhost:8000/v1"
else:
openai_base_url = "https://api.openai.com/v1"

# ================================================ #
# Lets define a function/tool for getting the weather. In this demo it we mockthe output
# In real life, you'd end up calling a library/API such as PWOWM (open weather map) library:
# Depending on your app's functionality, you may also, call vendor/external or internal custom APIs

from pydantic import BaseModel, Field
from typing import Optional, Type
from langchain.tools import BaseTool


def get_current_weather(location, unit):
# Call an external API to get relevant information (like serpapi, etc)
# Here for the demo we will send a mock response
weather_info = {
"location": location,
"temperature": "78",
"unit": unit,
"forecast": ["sunny", "with a chance of rain"],
}
return weather_info


class GetCurrentWeatherCheckInput(BaseModel):
# Check the input for Weather
location: str = Field(
..., description="The name of the location name for which we need to find the weather"
)
unit: str = Field(..., description="The unit for the temperature value")


class GetCurrentWeatherTool(BaseTool):
name = "get_current_weather"
description = "Used to find the weather for a given location in said unit"

def _run(self, location: str, unit: str):
# print("I am running!")
weather_response = get_current_weather(location, unit)
return weather_response

def _arun(self, location: str, unit: str):
raise NotImplementedError("This tool does not support async")

args_schema: Optional[Type[BaseModel]] = GetCurrentWeatherCheckInput


# ================================================ #

tools = [GetCurrentWeatherTool()]
prompt = hub.pull(args.prompt_template)
llm = ChatOpenAI(
openai_api_base=openai_base_url,
model_name=args.model_name,
openai_api_key=openai_api_key,
max_tokens=args.max_tokens,
callbacks=[
StreamingStdOutCallbackHandler() if args.streaming_response else StdOutCallbackHandler()
],
streaming=args.streaming_response,
)
agent = create_openai_tools_agent(tools=tools, llm=llm, prompt=prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
agent_executor.invoke({"input": "what is the weather today in Boston?"})
agent_executor.invoke({"input": "tell me a short joke?"})
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
action="store_true",
help="Whether to enable streaming response",
)
parser.add_argument("--max_tokens", default=256, help="The maximum numbers of tokens to generate")


args = parser.parse_args()

Expand All @@ -52,6 +54,7 @@
model_name=args.model_name,
openai_api_key=openai_api_key,
streaming=args.streaming_response,
max_tokens=args.max_tokens,
)

prompt = PromptTemplate(template="list 3 {things}", input_variables=["things"])
Expand Down
108 changes: 108 additions & 0 deletions examples/inference/api_server_openai/openai_tools_call_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#
# Copyright 2023 The LLM-on-Ray Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import argparse
from openai import OpenAI
import os

parser = argparse.ArgumentParser(
description="Example script to query with openai sdk", add_help=True
)
parser.add_argument(
"--model_name",
default="mistral-7b-instruct-v0.2",
type=str,
help="The name of model to request",
)
parser.add_argument(
"--streaming_response",
default=False,
action="store_true",
help="Whether to enable streaming response",
)
parser.add_argument(
"--max_new_tokens", default=512, help="The maximum numbers of tokens to generate"
)
args = parser.parse_args()

if "OPENAI_API_KEY" in os.environ:
openai_api_key = os.environ["OPENAI_API_KEY"]
else:
openai_api_key = "not_needed"

if "OPENAI_BASE_URL" in os.environ:
openai_base_url = os.environ["OPENAI_BASE_URL"]
elif openai_api_key == "not_needed":
openai_base_url = "http://localhost:8000/v1"
else:
openai_base_url = "https://api.openai.com/v1"


client = OpenAI(base_url=openai_base_url, api_key=openai_api_key)

tools = [
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
},
},
}
]
messages = [
[
{"role": "user", "content": "You are a helpful assistant"},
{"role": "user", "content": "What's the weather like in Boston today?"},
],
[
{"role": "user", "content": "You are a helpful assistant"},
{"role": "user", "content": "Tell me a short joke?"},
],
]
for message in messages:
print(f"User: {message[1]['content']}")
print("Assistant:", end=" ", flush=True)
chat_completion = client.chat.completions.create(
model=args.model_name,
messages=message,
max_tokens=args.max_new_tokens,
tools=tools,
tool_choice="auto",
stream=args.streaming_response,
)

if args.streaming_response:
for chunk in chat_completion:
content = chunk.choices[0].delta.content
if content is not None:
print(content, end="", flush=True)
tool_calls = chunk.choices[0].delta.tool_calls
if tool_calls is not None:
print(tool_calls, end="", flush=True)
print("")
else:
print(repr(chat_completion.choices[0].message.model_dump()))
Loading

0 comments on commit 6d33b49

Please sign in to comment.