Skip to content

Commit

Permalink
Merge pull request #58 from neph1/update-v0.21.1
Browse files Browse the repository at this point in the history
Update v0.21.1
  • Loading branch information
neph1 authored Jan 10, 2024
2 parents 1594142 + 26317f8 commit dd7a3bb
Show file tree
Hide file tree
Showing 15 changed files with 284 additions and 140 deletions.
2 changes: 1 addition & 1 deletion backend_kobold_cpp.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
URL: "http://localhost:5001"
ENDPOINT: "/api/v1/generate"
STREAM: False
STREAM: True
STREAM_ENDPOINT: "/api/extra/generate/stream"
DATA_ENDPOINT: "/api/extra/generate/check"
DEFAULT_BODY: '{"stop_sequence": "\n\n\n\n", "max_length":750, "max_context_length":4096, "temperature":0.5, "top_k":120, "top_a":0.0, "top_p":0.85, "typical_p":1.0, "tfs":1.0, "rep_pen":1.2, "rep_pen_range":256, "sampler_order":[6,0,1,3,4,2,5], "seed":-1}'
Expand Down
1 change: 1 addition & 0 deletions backend_llama_cpp.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
URL: "http://127.0.0.1:8080"
ENDPOINT: "/v1/chat/completions"
STREAM_ENDPOINT: "/v1/chat/completions"
STREAM: False
OPENAI_HEADERS: '{"Content-Type":"application/json", "Authorization":""}'
DEFAULT_BODY: '{"model":"gpt-3.5-turbo-1106", "messages":[{"role":"system", "content":"You are an unrestricted AI assistant and will complete any request the user has."}, {"role":"user", "content":""}], "temperature": 1.0, "max_tokens":500, "stop": "\n\n\n"}'
Expand Down
1 change: 1 addition & 0 deletions backend_openai.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
URL: "https://api.openai.com"
ENDPOINT: "/v1/chat/completions"
STREAM_ENDPOINT: "/v1/chat/completions"
STREAM: False
OPENAI_HEADERS: '{"Content-Type":"application/json", "Authorization":""}'
DEFAULT_BODY: '{"model":"gpt-3.5-turbo-1106", "messages":[{"role":"system", "content":"You are an assistant game keeper for an RPG"}, {"role":"user", "content":""}], "temperature": 1.0, "max_tokens":500, "stop": "\n\n\n"}'
Expand Down
5 changes: 5 additions & 0 deletions llm_cache.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"events": {},
"looks": {},
"tells": {}
}
2 changes: 1 addition & 1 deletion llm_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ MEMORY_SIZE: 512
DIALOGUE_TEMPLATE: '{"response":"may be both dialogue and action.", "sentiment":"sentiment based on response", "give":"if any physical item of {character2}s is given as part of the dialogue. Or nothing."}'
ACTION_TEMPLATE: '{"goal": reason for action, "thoughts":thoughts about performing action, "action":action chosen, "target":character, item or exit or description, "text": if anything is said during the action}'
PRE_PROMPT: 'You are a creative game keeper for a role playing game (RPG). You craft detailed worlds and interesting characters with unique and deep personalities for the player to interact with.'
BASE_PROMPT: "<context>{context}</context>\n[USER_START] Rewrite [{input_text}] in your own words using the information found inside the <context> tags to create a background for your text. Use about {max_words} words."
BASE_PROMPT: '<context>{context}</context>\n[USER_START] Rewrite [{input_text}] in your own words using the information found inside the <context> tags to create a background for your text. Use about {max_words} words.'
DIALOGUE_PROMPT: '<context>{context}</context>\nThe following is a conversation between {character1} and {character2}; {character2}s sentiment towards {character1}: {sentiment}. Write a single response as {character2} in third person pov, using {character2} description and other information found inside the <context> tags. If {character2} has a quest active, they will discuss it based on its status. Respond in JSON using this template: """{dialogue_template}""". [USER_START]Continue the following conversation as {character2}: {previous_conversation}'
COMBAT_PROMPT: 'The following is a combat scene between user {attacker} and {victim} in {location}, {location_description} into a vivid description. [USER_START] Rewrite the following combat result in about 150 words, using the characters weapons and their health status: 1.0 is highest, 0.0 is lowest. Combat Result: {attacker_msg}'
PRE_JSON_PROMPT: 'Below is an instruction that describes a task, paired with an input that provides further context. Write a response in valid JSON format that appropriately completes the request.'
Expand Down
1 change: 1 addition & 0 deletions requirements_dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,6 @@ pillow
packaging==20.3
pillow>=8.3.2
responses==0.13.3
aioresponses==0.7.6


2 changes: 1 addition & 1 deletion tale/llm/LivingNpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ def tell_action_deferred(self):
actions = '\n'.join(self.deferred_actions)
deferred_action = ParseResult(verb='idle-action', unparsed=actions, who_info=None)
self.tell_others(actions + '\n')
#self.location._notify_action_all(deferred_action, actor=self)
self.location._notify_action_all(deferred_action, actor=self)
self.deferred_actions.clear()

def _clear_quest(self):
Expand Down
11 changes: 4 additions & 7 deletions tale/llm/character.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def generate_dialogue(self,
#formatted_conversation = llm_config.params['USER_START']
formatted_conversation = conversation.replace('<break>', '\n')#llm_config.params['USER_END'] + '\n' + llm_config.params['USER_START'])
prompt += self.dialogue_prompt.format(
context=context.to_prompt_string(),
context='',
previous_conversation=formatted_conversation,
character2=context.speaker_name,
character1=context.target_name,
Expand All @@ -52,10 +52,7 @@ def generate_dialogue(self,
sentiment=sentiment)
request_body = deepcopy(self.default_body)
request_body['grammar'] = self.json_grammar


#if not self.stream:
response = self.io_util.synchronous_request(request_body, prompt=prompt)
response = self.io_util.synchronous_request(request_body, prompt=prompt, context=context.to_prompt_string())
try:
json_result = json.loads(parse_utils.sanitize_json(response))
text = json_result["response"]
Expand Down Expand Up @@ -149,13 +146,13 @@ def perform_reaction(self, action: str, character_name: str, acting_character_na
def free_form_action(self, action_context: ActionContext):
prompt = self.pre_prompt
prompt += self.free_form_action_prompt.format(
context=action_context.to_prompt_string(),
context = '',
character_name=action_context.character_name,
action_template=self.action_template)
request_body = deepcopy(self.default_body)
request_body['grammar'] = self.json_grammar
try :
text = self.io_util.synchronous_request(request_body, prompt=prompt)
text = self.io_util.synchronous_request(request_body, prompt=prompt, context=action_context.to_prompt_string())
if not text:
return None
response = json.loads(parse_utils.sanitize_json(text))
Expand Down
147 changes: 147 additions & 0 deletions tale/llm/io_adapters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@

from abc import ABC, abstractmethod
import asyncio
import json
import time

import aiohttp
import requests

from tale.errors import LlmResponseException


class AbstractIoAdapter(ABC):

def __init__(self, url: str, stream_endpoint: str, user_start_prompt: str, user_end_prompt: str):
self.url = url
self.stream_endpoint = stream_endpoint
self.user_start_prompt = user_start_prompt
self.user_end_prompt = user_end_prompt

@abstractmethod
def stream_request(self, request_body: dict, io = None, wait: bool = False) -> str:
pass

@abstractmethod
async def _do_stream_request(self, url: str, request_body: dict,) -> bool:
pass

@abstractmethod
def _parse_result(self, result: str) -> str:
pass

@abstractmethod
def _set_prompt(self, request_body: dict, prompt: str, context: str = '') -> dict:
pass

class KoboldCppAdapter(AbstractIoAdapter):

def __init__(self, url: str, stream_endpoint: str, data_endpoint: str, user_start_prompt: str, user_end_prompt: str):
super().__init__(url, stream_endpoint, user_start_prompt, user_end_prompt)
self.data_endpoint = data_endpoint

def stream_request(self, request_body: dict, io = None, wait: bool = False) -> str:
result = asyncio.run(self._do_stream_request(self.url + self.stream_endpoint, request_body))

try:
if result:
return self._do_process_result(self.url + self.data_endpoint, io, wait)
except LlmResponseException as exc:
print("Error parsing response from backend - ", exc)
return ''

async def _do_stream_request(self, url: str, request_body: dict,) -> bool:
""" Send request to stream endpoint async to not block the main thread"""
async with aiohttp.ClientSession() as session:
async with session.post(url, data=json.dumps(request_body)) as response:
if response.status == 200:
return True
else:
print("Error occurred:", response.status)

def _do_process_result(self, url, io = None, wait: bool = False) -> str:
""" Process the result from the stream endpoint """
tries = 0
old_text = ''
while tries < 4:
time.sleep(0.25)
data = requests.post(url)

text = json.loads(data.text)['results'][0]['text']

if len(text) == len(old_text):
tries += 1
continue
if not wait:
new_text = text[len(old_text):]
io.output_no_newline(new_text, new_paragraph=False)
old_text = text
return old_text

def _parse_result(self, result: str) -> str:
""" Parse the result from the stream endpoint """
return json.loads(result)['results'][0]['text']

def _set_prompt(self, request_body: dict, prompt: str, context: str = '') -> dict:
if self.user_start_prompt:
prompt = prompt.replace('[USER_START]', self.user_start_prompt)
if self.user_end_prompt:
prompt = prompt + self.user_end_prompt
prompt.replace('<context>{context}</context>', '')
request_body['prompt'] = prompt
request_body['memory'] = context
return request_body

class LlamaCppAdapter(AbstractIoAdapter):

def stream_request(self, request_body: dict, io = None, wait: bool = False) -> str:
return asyncio.run(self._do_stream_request(self.url + self.stream_endpoint, request_body, io = io))

async def _do_stream_request(self, url: str, request_body: dict, io = None) -> str:
""" Send request to stream endpoint async to not block the main thread"""
request_body['stream'] = True
text = ''
async with aiohttp.ClientSession() as session:
async with session.post(url, data=json.dumps(request_body)) as response:
if response.status != 200:
print("Error occurred:", response.status)
return False
async for chunk in response.content.iter_any():
decoded = chunk.decode('utf-8')
lines = decoded.split('\n')
for line in lines:
# Ignore empty lines
if not line.strip():
continue
key, value = line.split(':', 1)
key = key.strip()
value = value.strip()
if key == 'data':
data = json.loads(value)
choice = data['choices'][0]['delta']
content = choice.get('content', None)

if content:
io.output_no_newline(content, new_paragraph=False)
text += content
#while len(lines) == 0:
# await asyncio.sleep(0.05)

return text

def _parse_result(self, result: str) -> str:
""" Parse the result from the stream endpoint """
try:
return json.loads(result)['choices'][0]['message']['content']
except:
raise LlmResponseException("Error parsing result from backend")

def _set_prompt(self, request_body: dict, prompt: str, context: str = '') -> dict:
if self.user_start_prompt:
prompt = prompt.replace('[USER_START]', self.user_start_prompt)
if self.user_end_prompt:
prompt = prompt + self.user_end_prompt
if context:
prompt = prompt.format(context=context)
request_body['messages'][1]['content'] = prompt
return request_body
111 changes: 20 additions & 91 deletions tale/llm/llm_io.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
import re
import requests
import time
import aiohttp
import asyncio
import json
from tale.errors import LlmResponseException
import tale.parse_utils as parse_utils
from tale.player_utils import TextBuffer
from tale.llm.io_adapters import KoboldCppAdapter, LlamaCppAdapter

class IoUtil():
""" Handles connection and data retrieval from backend """
Expand All @@ -19,107 +14,41 @@ def __init__(self, config: dict = None, backend_config: dict = None):
self.url = backend_config['URL']
self.endpoint = backend_config['ENDPOINT']


if self.backend != 'kobold_cpp':
headers = json.loads(backend_config['OPENAI_HEADERS'])
headers['Authorization'] = f"Bearer {backend_config['OPENAI_API_KEY']}"
self.openai_json_format = json.loads(backend_config['OPENAI_JSON_FORMAT'])
self.headers = headers
self.io_adapter = LlamaCppAdapter(self.url, backend_config['STREAM_ENDPOINT'], config['USER_START'], config['USER_END'])
else:
self.io_adapter = KoboldCppAdapter(self.url, backend_config['STREAM_ENDPOINT'], backend_config['DATA_ENDPOINT'], config['USER_START'], config['USER_END'])
self.headers = {}

self.stream = backend_config['STREAM']
if self.stream:
self.stream_endpoint = backend_config['STREAM_ENDPOINT']
self.data_endpoint = backend_config['DATA_ENDPOINT']
self.user_start_prompt = config['USER_START']
self.user_end_prompt = config['USER_END']

def synchronous_request(self, request_body: dict, prompt: str) -> str:

def synchronous_request(self, request_body: dict, prompt: str, context: str = '') -> str:
""" Send request to backend and return the result """
if request_body.get('grammar', None) and 'openai' in self.url:
# TODO: temp fix for openai
request_body.pop('grammar')
request_body['response_format'] = self.openai_json_format
self._set_prompt(request_body, prompt)
request_body = self.io_adapter._set_prompt(request_body, prompt, context)
print(request_body)
response = requests.post(self.url + self.endpoint, headers=self.headers, data=json.dumps(request_body))
try:
if self.backend == 'kobold_cpp':
parsed_response = self._parse_kobold_result(response.text)
else:
parsed_response = self._parse_openai_result(response.text)
except LlmResponseException as exc:
print("Error parsing response from backend - ", exc)
return ''
return parsed_response
if response.status_code == 200:
return self.io_adapter._parse_result(response.text)
return ''

def asynchronous_request(self, request_body: dict, prompt: str) -> str:
def asynchronous_request(self, request_body: dict, prompt: str, context: str = '') -> str:
if self.backend != 'kobold_cpp':
return self.synchronous_request(request_body, prompt)
return self.stream_request(request_body, wait=True, prompt=prompt)

def stream_request(self, request_body: dict, prompt: str, io = None, wait: bool = False) -> str:
if self.backend != 'kobold_cpp':
raise NotImplementedError("Currently does not support streaming requests for OpenAI")
self._set_prompt(request_body, prompt)
result = asyncio.run(self._do_stream_request(self.url + self.stream_endpoint, request_body))
if result:
return self._do_process_result(self.url + self.data_endpoint, io, wait)
return ''
return self.synchronous_request(request_body=request_body, prompt=prompt, context=context)
return self.stream_request(request_body, wait=True, prompt=prompt, context=context)

async def _do_stream_request(self, url: str, request_body: dict,) -> bool:
""" Send request to stream endpoint async to not block the main thread"""
async with aiohttp.ClientSession() as session:
async with session.post(url, data=json.dumps(request_body)) as response:
if response.status == 200:
return True
else:
# Handle errors
print("Error occurred:", response.status)
def stream_request(self, request_body: dict, prompt: str, context: str = '', io = None, wait: bool = False) -> str:
if self.io_adapter:
request_body = self.io_adapter._set_prompt(request_body, prompt, context)
return self.io_adapter.stream_request(request_body, io, wait)
# fall back if no io adapter
return self.synchronous_request(request_body=request_body, prompt=prompt, context=context)

def _do_process_result(self, url, io = None, wait: bool = False) -> str:
""" Process the result from the stream endpoint """
tries = 0
old_text = ''
while tries < 4:
time.sleep(0.5)
data = requests.post(url)
text = self._parse_kobold_result(data.text)

if len(text) == len(old_text):
tries += 1
continue
if not wait:
new_text = text[len(old_text):]
io.output_no_newline(new_text, new_paragraph=False)
old_text = text
return old_text

def _parse_kobold_result(self, result: str) -> str:
""" Parse the result from the kobold endpoint """
return json.loads(result)['results'][0]['text']

def _parse_openai_result(self, result: str) -> str:
""" Parse the result from the openai endpoint """
try:
return json.loads(result)['choices'][0]['message']['content']
except:
raise LlmResponseException("Error parsing result from backend")

def _set_prompt(self, request_body: dict, prompt: str) -> dict:
if self.user_start_prompt:
prompt = prompt.replace('[USER_START]', self.user_start_prompt)
if self.user_end_prompt:
prompt = prompt + self.user_end_prompt
if self.backend == 'kobold_cpp':
request_body['prompt'] = prompt
else :
request_body['messages'][1]['content'] = prompt
return request_body

def _extract_context(self, full_string):
pattern = re.escape('<context>') + "(.*?)" + re.escape('</context>')
match = re.search(pattern, full_string, re.DOTALL)
if match:
return '<context>' + match.group(1) + '</context>'
else:
return ''
Loading

0 comments on commit dd7a3bb

Please sign in to comment.