From 45974e1c1b616500d1d382201ece9f3db6e07f01 Mon Sep 17 00:00:00 2001
From: rickard <rickard@mindemia.com>
Date: Sat, 6 Jan 2024 19:35:06 +0100
Subject: [PATCH 1/5] new context handling

---
 backend_kobold_cpp.yaml              |  3 +-
 llm_config.yaml                      | 18 +++++-----
 tale/llm/contexts/ActionContext.py   | 30 +++++++++++++++++
 tale/llm/contexts/BaseContext.py     |  9 +++++
 tale/llm/contexts/DialogueContext.py | 24 ++++++++++++++
 tale/llm/contexts/EvokeContext.py    | 10 ++++++
 tale/llm/llm_utils.py                | 49 ++++++++++++++++------------
 tests/test_llm_utils.py              |  4 +--
 8 files changed, 114 insertions(+), 33 deletions(-)
 create mode 100644 tale/llm/contexts/ActionContext.py
 create mode 100644 tale/llm/contexts/BaseContext.py
 create mode 100644 tale/llm/contexts/DialogueContext.py
 create mode 100644 tale/llm/contexts/EvokeContext.py

diff --git a/backend_kobold_cpp.yaml b/backend_kobold_cpp.yaml
index 1ab12caa..e94ab928 100644
--- a/backend_kobold_cpp.yaml
+++ b/backend_kobold_cpp.yaml
@@ -1,8 +1,7 @@
 URL: "http://localhost:5001"
 ENDPOINT: "/api/v1/generate"
-STREAM: False
+STREAM: True
 STREAM_ENDPOINT: "/api/extra/generate/stream"
 DATA_ENDPOINT: "/api/extra/generate/check"
 DEFAULT_BODY: '{"stop_sequence": "\n\n\n\n", "max_length":750, "max_context_length":4096, "temperature":0.5, "top_k":120, "top_a":0.0, "top_p":0.85, "typical_p":1.0, "tfs":1.0, "rep_pen":1.2, "rep_pen_range":256, "sampler_order":[6,0,1,3,4,2,5], "seed":-1}'
-ANALYSIS_BODY: '{}'
 GENERATION_BODY: '{"stop_sequence": "\n\n\n\n", "max_length":750, "max_context_length":4096, "temperature":1.0, "top_k":120, "top_a":0.0, "top_p":0.85, "typical_p":1.0, "tfs":1.0, "rep_pen":1.2, "rep_pen_range":256, "sampler_order":[6,0,1,3,4,2,5], "seed":-1}'
\ No newline at end of file
diff --git a/llm_config.yaml b/llm_config.yaml
index 79eb63e6..3a5abe14 100644
--- a/llm_config.yaml
+++ b/llm_config.yaml
@@ -1,12 +1,12 @@
-WORD_LIMIT: 200
+WORD_LIMIT: 200 # max number of words the model is encoraged to generate. not a hard limit
+SHORT_WORD_LIMIT: 25 # max number of words when asked to write something short. not a hard limit
 BACKEND: "kobold_cpp" # valid options: "openai", "llama_cpp", "kobold_cpp"
-ANALYSIS_BODY: '{}'
 MEMORY_SIZE: 512
-PRE_PROMPT: 'You are a creative game keeper. You craft detailed worlds and interesting characters with unique and deep personalities for the player to interact with.'
-BASE_PROMPT: "[Story context: {story_context}]; [History: {history}]; [USER_START] Rewrite [{input_text}] in your own words using the supplied Context and History to create a background for your text. Use about {max_words} words."
-ACTION_PROMPT: "[Story context: {story_context}]; [History: {history}]; [USER_START] Rewrite the Action, and nothing else, in your own words using the supplied Context and Location. History is what happened before. Use less than {max_words} words. [Action: {input_text}]."
-DIALOGUE_PROMPT: 'Story context: {story_context}; Location: {location}; The following is a conversation between {character1} and {character2}; {character1}:{character1_description}; {character2}:{character2_description}; {character2}s sentiment towards {character1}: {sentiment}. Write a single response as {character2} in third person pov, using {character2} description. If {character2} has a quest active, they will discuss it based on its status. Respond in JSON using this template: {{"response":"may be both dialogue and action.", "sentiment":"sentiment based on response", "give":"if any physical item of {character2}s is given as part of the dialogue. Or nothing."}}. [USER_START]Continue the following conversation as {character2}: {previous_conversation}'
-ITEM_PROMPT: 'Items:[{items}];Characters:[{character1},{character2}]  \n\n [USER_START] Decide if an item was explicitly given, taken, dropped or put somewhere in the following text:[Text:{text}]. Insert your thoughts about whether an item was explicitly given, taken, put somewhere or dropped in "my thoughts", and the results in "item", "from" and "to", or make them empty if . Insert {character1}s sentiment towards {character2} in a single word in "sentiment assessment". Fill in the following JSON template: {{ "thoughts":"", "result": {{ "item":"", "from":"", "to":""}}, {{"sentiment":"sentiment assessment"}} }} End of example. \n\n Write your response in valid JSON.'
+DIALOGUE_TEMPLATE: '{"response":"may be both dialogue and action.", "sentiment":"sentiment based on response", "give":"if any physical item of {character2}s is given as part of the dialogue. Or nothing."}'
+ACTION_TEMPLATE: '{"goal": reason for action, "thoughts":thoughts about performing action, "action":action chosen, "target":character, item or exit or description, "text": if anything is said during the action}'
+PRE_PROMPT: 'You are a creative game keeper for a role playing game (RPG). You craft detailed worlds and interesting characters with unique and deep personalities for the player to interact with.'
+BASE_PROMPT: "<context>{context}</context>\n[USER_START] Rewrite [{input_text}] in your own words using the information found inside the <context> tags to create a background for your text. Use about {max_words} words."
+DIALOGUE_PROMPT: '<context>{context}</context>\nThe following is a conversation between {character1} and {character2}; {character2}s sentiment towards {character1}: {sentiment}. Write a single response as {character2} in third person pov, using {character2} description and other information found inside the <context> tags. If {character2} has a quest active, they will discuss it based on its status. Respond in JSON using this template: """{dialogue_template}""". [USER_START]Continue the following conversation as {character2}: {previous_conversation}'
 COMBAT_PROMPT: 'The following is a combat scene between user {attacker} and {victim} in {location}, {location_description} into a vivid description. [USER_START] Rewrite the following combat result in about 150 words, using the characters weapons and their health status: 1.0 is highest, 0.0 is lowest. Combat Result: {attacker_msg}'
 PRE_JSON_PROMPT: 'Below is an instruction that describes a task, paired with an input that provides further context. Write a response in valid JSON format that appropriately completes the request.'
 CREATE_CHARACTER_PROMPT: 'Story context: {story_context}; World info: {world_info};[USER_START] For a {story_type}, create a diverse character with rich personality that can be interacted with using the story context and keywords. {{quest_prompt}} Do not mention height. Story context: {story_context}; keywords: {keywords}. Fill in this JSON template and write nothing else: {{"name":"", "description": "50 words", "appearance": "25 words", "personality": "50 words", "money":(int), "level":"", "gender":"m/f/n", "age":(int), "race":""}}'
@@ -28,6 +28,6 @@ PLAYER_ENTER_PROMPT: 'Story context: {story_context}; World info: {world_info};
 QUEST_PROMPT: '[Story context: {story_context}]; World info: {world_info}; Zone info: {zone_info}; Character: {character_card}; [USER_START] In an RPG described as {story_type}, {character_name} needs someone to perform a task. Based on the following input, come up with a suitable reason for it, using {character_name}s personality and history. Task info: {base_quest}. Fill in this JSON template and do not write anything else: {{"reason":""}} \n\n '
 NOTE_QUEST_PROMPT: '[Story context: {story_context}]; World info: {world_info}; Zone info: {zone_info}; [USER_START]For an RPG described as {story_type}, generate a quest that starts from reading a note. The reader must find and talk to a person. Fill in the following JSON template and write nothing else.: {{"reason": "only what the note says. 50 words.", "type":"talk", "target":"who to talk to", "location":"", "name":"name of quest"}}'
 NOTE_LORE_PROMPT: '[Story context: {story_context}]; World info: {world_info}; Zone info: {zone_info}; [USER_START]For an RPG described as {story_type}, Decide what is written on a note that has been found. Use the provided story and world information to generate a piece of lore. Use about 50 words.'
-ACTION_PROMPT: 'Story context: {story_context};<location>{location}</location> \nAct as as {character_name} in a {story_type} RPG and perform an action.<description>{character}</description> <actions>{actions}</actions>. <items>{location_items}</items>. <characters>{characters}</characters> <exits>{exits}</exits>.\nPick an action according to {character_name}s description and mood. If suitable, select something to perform the action on (target). Make sure the action is from the list and is related to {character_name}s thoughts.\n Respond using JSON in the following format: """{{"goal": reason for action, "thoughts":thoughts about performing action, "action":action chosen, "target":character, item or exit or description, "text": if anything is said during the action}}"""<history>{history}</history>'
+ACTION_PROMPT: '<context>{context}</context>\nAct as as {character_name}.\nUsing the information supplied inside the <context> tag, pick an action according to {character_name}s description and mood. If suitable, select something to perform the action on (target). The action should be in the supplied list and should be related to {character_name}s thoughts.\n Respond using JSON in the following format: """{action_template}"""'
 USER_START: '### Instruction:'
-USER_END: '### Response:'
\ No newline at end of file
+USER_END: '### Response:\n'
\ No newline at end of file
diff --git a/tale/llm/contexts/ActionContext.py b/tale/llm/contexts/ActionContext.py
new file mode 100644
index 00000000..5c64d7ef
--- /dev/null
+++ b/tale/llm/contexts/ActionContext.py
@@ -0,0 +1,30 @@
+
+
+import json
+from tale.base import Location
+from tale.llm.contexts.BaseContext import BaseContext
+
+
+class ActionContext(BaseContext):
+
+    def __init__(self, story_context: str, story_type: str, character_name: str, character_card: str, event_history: str, location: Location):
+        super().__init__(story_context)
+        self.story_type = story_type
+        self.character_name = character_name
+        self.character_card = character_card
+        self.event_history = event_history
+        self.location = location
+
+
+    def to_prompt_string(self) -> str:
+        actions = ', '.join(['move, say, attack, wear, remove, wield, take, eat, drink, emote'])
+        characters = {}
+        for living in self.location.livings:
+            if living.visible and living.name != self.character_name.lower():
+                if living.alive:
+                    characters[living.name] = living.short_description
+                else:
+                    characters[living.name] = f"{living.short_description} (dead)"
+        exits = self.location.exits.keys()
+        items = [item.name for item in self.location.items if item.visible]
+        return f"Story context:{self.story_context}; Story type:{self.story_type}; Available actions: {actions}; Location:{self.location.name}, {self.location.description}; Available exits: {exits}; Self: {self.character_card}; Present items: {items}; Present characters: {json.dumps(characters)}; History:{self.event_history};"
\ No newline at end of file
diff --git a/tale/llm/contexts/BaseContext.py b/tale/llm/contexts/BaseContext.py
new file mode 100644
index 00000000..30ad07d1
--- /dev/null
+++ b/tale/llm/contexts/BaseContext.py
@@ -0,0 +1,9 @@
+
+
+class BaseContext():
+
+    def __init__(self, story_context: str) -> None:
+        self.story_context = story_context
+
+    def to_prompt_string(self) -> str:
+        pass
\ No newline at end of file
diff --git a/tale/llm/contexts/DialogueContext.py b/tale/llm/contexts/DialogueContext.py
new file mode 100644
index 00000000..4e61d08a
--- /dev/null
+++ b/tale/llm/contexts/DialogueContext.py
@@ -0,0 +1,24 @@
+
+
+from tale.llm.contexts.BaseContext import BaseContext
+
+
+class DialogueContext(BaseContext):
+
+    def __init__(self, 
+            story_context: str,
+            location_description: str,
+            speaker_card: str,
+            speaker_name: str,
+            target_name: str,
+            target_description: str):
+        super().__init__(story_context)
+        self.location_description = location_description
+        self.speaker_card = speaker_card
+        self.speaker_name = speaker_name
+        self.target_name = target_name
+        self.target_description = target_description
+
+
+    def to_prompt_string(self) -> str:
+        return f"Story context:{self.story_context}; Location:{self.location_description}; Self:{self.speaker_name}:{self.speaker_card}; Listener:{self.target_name}:{self.target_description};"
\ No newline at end of file
diff --git a/tale/llm/contexts/EvokeContext.py b/tale/llm/contexts/EvokeContext.py
new file mode 100644
index 00000000..86997aac
--- /dev/null
+++ b/tale/llm/contexts/EvokeContext.py
@@ -0,0 +1,10 @@
+from tale.llm.contexts.BaseContext import BaseContext
+
+class EvokeContext(BaseContext):
+
+    def __init__(self, story_context: str, history: str) -> None:
+        super().__init__(story_context)
+        self.history = history
+
+    def to_prompt_string(self) -> str:
+        return f"Story context:{self.story_context}; History:{self.history};"
\ No newline at end of file
diff --git a/tale/llm/llm_utils.py b/tale/llm/llm_utils.py
index 1c004dd9..dcfd5cc6 100644
--- a/tale/llm/llm_utils.py
+++ b/tale/llm/llm_utils.py
@@ -6,8 +6,11 @@
 from tale.base import Location
 from tale.image_gen.base_gen import ImageGeneratorBase
 from tale.llm.character import CharacterBuilding
+from tale.llm.contexts.ActionContext import ActionContext
+from tale.llm.contexts.EvokeContext import EvokeContext
 from tale.llm.llm_ext import DynamicStory
 from tale.llm.llm_io import IoUtil
+from tale.llm.contexts.DialogueContext import DialogueContext
 from tale.llm.quest_building import QuestBuilding
 from tale.llm.story_building import StoryBuilding
 from tale.llm.world_building import WorldBuilding
@@ -37,9 +40,10 @@ def __init__(self, io_util: IoUtil = None):
         self.default_body = json.loads(backend_config['DEFAULT_BODY'])
         self.memory_size = config_file['MEMORY_SIZE']
         self.pre_prompt = config_file['PRE_PROMPT'] # type: str
-        self.base_prompt = config_file['BASE_PROMPT'] # type: str
+        self.evoke_prompt = config_file['BASE_PROMPT'] # type: str
         self.combat_prompt = config_file['COMBAT_PROMPT'] # type: str
         self.word_limit = config_file['WORD_LIMIT']
+        self.short_word_limit = config_file['SHORT_WORD_LIMIT']
         self.story_background_prompt = config_file['STORY_BACKGROUND_PROMPT'] # type: str
         self.json_grammar = config_file['JSON_GRAMMAR'] # type: str
         self.__story = None # type: DynamicStory
@@ -83,13 +87,11 @@ def evoke(self, player_io: TextBuffer, message: str, short_len : bool=False, rol
             return output_template.format(message=message, text=cached_look), rolling_prompt
 
         trimmed_message = parse_utils.remove_special_chars(str(message))
-
-        amount = 25
+        context = EvokeContext(story_context=self.__story_context, history=rolling_prompt if not skip_history or alt_prompt else '')
         prompt = self.pre_prompt
-        prompt += alt_prompt or (self.base_prompt.format(
-            story_context=self.__story_context,
-            history=rolling_prompt if not skip_history or alt_prompt else '',
-            max_words=self.word_limit if not short_len else amount,
+        prompt += alt_prompt or (self.evoke_prompt.format(
+            context=context.to_prompt_string(),
+            max_words=self.word_limit if not short_len else self.short_word_limit,
             input_text=str(trimmed_message)))
         
         request_body = deepcopy(self.default_body)
@@ -98,9 +100,9 @@ def evoke(self, player_io: TextBuffer, message: str, short_len : bool=False, rol
             text = self.io_util.synchronous_request(request_body, prompt=prompt)
             llm_cache.cache_look(text, text_hash_value)
             return output_template.format(message=message, text=text), rolling_prompt
-
+        text = self.io_util.stream_request(request_body=request_body, player_io=player_io, prompt=prompt, io=self.connection)     
         player_io.print(output_template.format(message=message, text=text), end=False, format=True, line_breaks=False)
-        text = self.io_util.stream_request(request_body, player_io, self.connection, prompt=prompt)
+        
         llm_cache.cache_look(text, text_hash_value)
         
         return '\n', rolling_prompt
@@ -114,16 +116,17 @@ def generate_dialogue(self, conversation: str,
                           location_description = '',
                           event_history='',
                           short_len : bool=False):
-        return self._character.generate_dialogue(conversation, 
-                                                 character_card=character_card, 
-                                                 character_name=character_name, 
-                                                 target=target, 
-                                                 target_description=target_description, 
-                                                 sentiment=sentiment, 
-                                                 location_description=location_description, 
-                                                 story_context=self.__story_context, 
-                                                 event_history=event_history,
-                                                 short_len=short_len)
+        dialogue_context = DialogueContext(story_context=self.__story_context,
+                                           location_description=location_description,
+                                           speaker_card=character_card,
+                                           speaker_name=character_name,
+                                           target_name=target,
+                                           target_description=target_description)
+        return self._character.generate_dialogue(context=dialogue_context,
+                                                conversation=conversation,
+                                                sentiment=sentiment,
+                                                event_history=event_history,
+                                                short_len=short_len)
     
     def update_memory(self, rolling_prompt: str, response_text: str):
         """ Keeps a history of the last couple of events"""
@@ -234,7 +237,13 @@ def generate_image(self, character_name: str, character_appearance: dict = '', s
         return result
 
     def free_form_action(self, location: Location, character_name: str,  character_card: str = '', event_history: str = ''):
-        return self._character.free_form_action(self.__story_context, self.__story_type, location, character_name, character_card, event_history)
+        action_context = ActionContext(story_context=self.__story_context,
+                                       story_type=self.__story_type,
+                                       character_name=character_name,
+                                       character_card=character_card,
+                                       event_history=event_history,
+                                       location=location)
+        return self._character.free_form_action(action_context)
 
   
     def set_story(self, story: DynamicStory):
diff --git a/tests/test_llm_utils.py b/tests/test_llm_utils.py
index c160cb46..ff58a44e 100644
--- a/tests/test_llm_utils.py
+++ b/tests/test_llm_utils.py
@@ -65,7 +65,7 @@ def test_perform_travel_action(self):
     def test_generate_dialogue(self):
         # mostly testing that prompt works
         self.llm_util._character.io_util.response = ['{"response":"Hello there", "sentiment":"cheerful", "give":"ale"}']
-        result, item, sentiment = self.llm_util._character.generate_dialogue(conversation='test conversation', 
+        result, item, sentiment = self.llm_util.generate_dialogue(conversation='test conversation', 
                                                             character_card='{}', 
                                                             character_name='Norhardt', 
                                                             target='Arto', 
@@ -80,7 +80,7 @@ def test_generate_dialogue(self):
     def test_generate_dialogue_json(self):
         # mostly testing that prompt works
         self.llm_util._character.io_util.response = ["{\n  \"response\": \"Autumn greets Test character with a warm smile, her golden hair shining in the sunlight. She returns the greeting, her voice filled with kindness, \'Hello there, how can I assist you today?\'\"\n}"]
-        result, item, sentiment = self.llm_util._character.generate_dialogue(conversation='test conversation', 
+        result, item, sentiment = self.llm_util.generate_dialogue(conversation='test conversation', 
                                                             character_card='{}', 
                                                             character_name='Norhardt', 
                                                             target='Arto', 

From c1b2c9a1eec0aa1eaab4836b4c897360890c9e7f Mon Sep 17 00:00:00 2001
From: rickard <rickard@mindemia.com>
Date: Sat, 6 Jan 2024 21:16:00 +0100
Subject: [PATCH 2/5] streaming descriptions are back

---
 stories/teaparty/story_config.json |  2 +-
 stories/teaparty/world.json        | 93 ++++++++++++++++++++++++++----
 tale/llm/character.py              | 51 +++++-----------
 tale/llm/llm_io.py                 | 24 +++++---
 tale/llm/llm_utils.py              |  4 +-
 tale/player.py                     |  4 +-
 tale/tio/console_io.py             |  4 +-
 tale/tio/if_browser_io.py          |  9 ++-
 tale/tio/iobase.py                 |  2 +-
 tale/tio/tkinter_io.py             |  4 +-
 tests/test_player.py               |  7 +++
 11 files changed, 137 insertions(+), 67 deletions(-)

diff --git a/stories/teaparty/story_config.json b/stories/teaparty/story_config.json
index 995e93aa..054b7414 100644
--- a/stories/teaparty/story_config.json
+++ b/stories/teaparty/story_config.json
@@ -27,5 +27,5 @@
     "type": "A whimsical and humoristic tale of tea and madness. Guests are so busy with their own problems that it's difficult to make yourself heard.",
     "world_info": "",
     "world_mood": 0,
-    "custom_resources" : "True"
+    "custom_resources": true
 }
diff --git a/stories/teaparty/world.json b/stories/teaparty/world.json
index 57e15343..edcc7cdc 100644
--- a/stories/teaparty/world.json
+++ b/stories/teaparty/world.json
@@ -46,9 +46,9 @@
                 "personality": "",
                 "occupation": "",
                 "age": 0,
-                "type": "Npc",
+                "type": "Mob",
                 "race": "",
-                "gender": "m",
+                "gender": "male",
                 "level": 1,
                 "stats": {
                     "ac": 0,
@@ -65,7 +65,16 @@
                     "strength": 3,
                     "dexterity": 3,
                     "unarmed_attack": "FISTS"
-                                }
+                },
+                "memory": {
+                    "known_locations": {},
+                    "observed_events": [],
+                    "conversations": [],
+                    "sentiments": {},
+                    "action_history": [],
+                    "planned_actions": [],
+                    "goal": null
+                }
             },
             "duchess": {
                 "location": "Living room",
@@ -79,9 +88,9 @@
                 "personality": "",
                 "occupation": "",
                 "age": 0,
-                "type": "Npc",
+                "type": "Mob",
                 "race": "",
-                "gender": "f",
+                "gender": "female",
                 "level": 1,
                 "stats": {
                     "ac": 0,
@@ -98,7 +107,16 @@
                     "strength": 3,
                     "dexterity": 3,
                     "unarmed_attack": "FISTS"
-                                }
+                },
+                "memory": {
+                    "known_locations": {},
+                    "observed_events": [],
+                    "conversations": [],
+                    "sentiments": {},
+                    "action_history": [],
+                    "planned_actions": [],
+                    "goal": null
+                }
             },
             "ace of spades": {
                 "location": "Living room",
@@ -112,9 +130,9 @@
                 "personality": "",
                 "occupation": "",
                 "age": 0,
-                "type": "Npc",
+                "type": "Mob",
                 "race": "",
-                "gender": "m",
+                "gender": "male",
                 "level": 1,
                 "stats": {
                     "ac": 0,
@@ -131,9 +149,64 @@
                     "strength": 3,
                     "dexterity": 3,
                     "unarmed_attack": "FISTS"
-                                }
+                },
+                "memory": {
+                    "known_locations": {},
+                    "observed_events": [],
+                    "conversations": [],
+                    "sentiments": {},
+                    "action_history": [],
+                    "planned_actions": [],
+                    "goal": null
+                }
             }
         },
         "items": {}
+    },
+    "catalogue": {
+        "items": [
+            {
+                "name": "dagger",
+                "title": "Dagger",
+                "descr": "",
+                "short_descr": "A steel dagger",
+                "value": 0,
+                "rent": 0.0,
+                "weight": 0.0,
+                "takeable": true,
+                "location": "",
+                "wc": 0,
+                "base_damage": 1,
+                "bonus_damage": 0,
+                "weapon_type": "ONE_HANDED"
+            },
+            {
+                "name": "club",
+                "title": "Club",
+                "descr": "",
+                "short_descr": "A wooden club",
+                "value": 0,
+                "rent": 0.0,
+                "weight": 0.0,
+                "takeable": true,
+                "location": "",
+                "wc": 0,
+                "base_damage": 1,
+                "bonus_damage": 0,
+                "weapon_type": "ONE_HANDED"
+            },
+            {
+                "name": "note",
+                "title": "Note",
+                "descr": "",
+                "short_descr": "",
+                "value": 0,
+                "rent": 0.0,
+                "weight": 0.0,
+                "takeable": true,
+                "location": ""
+            }
+        ],
+        "creatures": []
     }
-}
+}
\ No newline at end of file
diff --git a/tale/llm/character.py b/tale/llm/character.py
index 104305a8..2a1ae137 100644
--- a/tale/llm/character.py
+++ b/tale/llm/character.py
@@ -9,7 +9,9 @@
 from tale.base import Location
 from tale.errors import LlmResponseException
 from tale.llm import llm_config
+from tale.llm.contexts.ActionContext import ActionContext
 from tale.llm.llm_io import IoUtil
+from tale.llm.contexts.DialogueContext import DialogueContext
 from tale.load_character import CharacterV2
 
 
@@ -19,25 +21,21 @@ def __init__(self, backend: str, io_util: IoUtil, default_body: dict):
         self.pre_prompt = llm_config.params['PRE_PROMPT']
         self.dialogue_prompt = llm_config.params['DIALOGUE_PROMPT']
         self.character_prompt = llm_config.params['CREATE_CHARACTER_PROMPT']
-        self.item_prompt = llm_config.params['ITEM_PROMPT']
         self.backend = backend
         self.io_util = io_util
         self.default_body = default_body
-        self.analysis_body = json.loads(llm_config.params['ANALYSIS_BODY'])
         self.travel_prompt = llm_config.params['TRAVEL_PROMPT']
         self.reaction_prompt = llm_config.params['REACTION_PROMPT']
         self.idle_action_prompt = llm_config.params['IDLE_ACTION_PROMPT']
         self.free_form_action_prompt = llm_config.params['ACTION_PROMPT']
         self.json_grammar = llm_config.params['JSON_GRAMMAR']
+        self.dialogue_template = llm_config.params['DIALOGUE_TEMPLATE']
+        self.action_template = llm_config.params['ACTION_TEMPLATE']
 
-    def generate_dialogue(self, conversation: str, 
-                          character_card: str, 
-                          character_name: str, 
-                          target: str, 
-                          target_description: str='', 
+    def generate_dialogue(self,
+                          context: DialogueContext,
+                          conversation: str,
                           sentiment = '', 
-                          location_description = '',
-                          story_context = '',
                           event_history = '',
                           short_len : bool=False):
         prompt = self.pre_prompt
@@ -45,13 +43,11 @@ def generate_dialogue(self, conversation: str,
         #formatted_conversation = llm_config.params['USER_START']
         formatted_conversation = conversation.replace('<break>', '\n')#llm_config.params['USER_END'] + '\n' + llm_config.params['USER_START'])
         prompt += self.dialogue_prompt.format(
-                story_context=story_context,
-                location=location_description,
+                context=context.to_prompt_string(),
                 previous_conversation=formatted_conversation,
-                character2_description=character_card,
-                character2=character_name,
-                character1=target,
-                character1_description=target_description,
+                character2=context.speaker_name,
+                character1=context.target_name,
+                dialogue_template=self.dialogue_template,
                 history=event_history,
                 sentiment=sentiment)
         request_body = deepcopy(self.default_body)
@@ -150,29 +146,12 @@ def perform_reaction(self, action: str, character_name: str, acting_character_na
         text = self.io_util.synchronous_request(request_body, prompt=prompt)
         return parse_utils.trim_response(text) + "\n"
     
-    def free_form_action(self, story_context: str, story_type: str, location: Location, character_name: str, character_card: str = '', event_history: str = ''):
-        actions = ', '.join(['move, say, attack, wear, remove, wield, take, eat, drink, emote'])
-        characters = {}
-        for living in location.livings:
-            if living.visible and living.name != character_name.lower():
-                if living.alive:
-                    characters[living.name] = living.short_description
-                else:
-                    characters[living.name] = f"{living.short_description} (dead)"
-        exits = location.exits.keys()
-        items = [item.name for item in location.items if item.visible]
+    def free_form_action(self, action_context: ActionContext):
         prompt = self.pre_prompt
         prompt += self.free_form_action_prompt.format(
-            story_context=story_context,
-            story_type=story_type,
-            actions=actions,
-            location=location.name,
-            exits=exits,
-            location_items=items,
-            characters=json.dumps(characters),
-            history=event_history,
-            character_name=character_name,
-            character=character_card)
+            context=action_context.to_prompt_string(),
+            character_name=action_context.character_name,
+            action_template=self.action_template)
         request_body = deepcopy(self.default_body)
         request_body['grammar'] = self.json_grammar
         try :
diff --git a/tale/llm/llm_io.py b/tale/llm/llm_io.py
index 16074e4c..957e0d92 100644
--- a/tale/llm/llm_io.py
+++ b/tale/llm/llm_io.py
@@ -1,3 +1,4 @@
+import re
 import requests
 import time
 import aiohttp
@@ -51,13 +52,13 @@ def asynchronous_request(self, request_body: dict, prompt: str) -> str:
             return self.synchronous_request(request_body, prompt)
         return self.stream_request(request_body, wait=True, prompt=prompt)
 
-    def stream_request(self, request_body: dict, prompt: str, player_io: TextBuffer = None, io = None, wait: bool = False) -> str:
+    def stream_request(self, request_body: dict, prompt: str, io = None, wait: bool = False) -> str:
         if self.backend != 'kobold_cpp':
             raise NotImplementedError("Currently does not support streaming requests for OpenAI")
         self._set_prompt(request_body, prompt)
         result = asyncio.run(self._do_stream_request(self.url + self.stream_endpoint, request_body))
         if result:
-            return self._do_process_result(self.url + self.data_endpoint, player_io, io, wait)
+            return self._do_process_result(self.url + self.data_endpoint, io, wait)
         return ''
 
     async def _do_stream_request(self, url: str, request_body: dict,) -> bool:
@@ -70,7 +71,7 @@ async def _do_stream_request(self, url: str, request_body: dict,) -> bool:
                     # Handle errors
                     print("Error occurred:", response.status)
 
-    def _do_process_result(self, url, player_io: TextBuffer = None, io = None, wait: bool = False) -> str:
+    def _do_process_result(self, url, io = None, wait: bool = False) -> str:
         """ Process the result from the stream endpoint """
         tries = 0
         old_text = ''
@@ -84,10 +85,9 @@ def _do_process_result(self, url, player_io: TextBuffer = None, io = None, wait:
                 continue
             if not wait:
                 new_text = text[len(old_text):]
-                player_io.print(new_text, end=False, format=True, line_breaks=False)
-                io.write_output()
+                io.output_no_newline(new_text, new_paragraph=False)
             old_text = text
-
+        io.output_no_newline("")
         return old_text
 
     def _parse_kobold_result(self, result: str) -> str:
@@ -108,7 +108,17 @@ def _set_prompt(self, request_body: dict, prompt: str) -> dict:
         if self.user_end_prompt:
             prompt = prompt + self.user_end_prompt
         if self.backend == 'kobold_cpp':
+            context = self._extract_context(prompt)
+            request_body['memory'] = context
             request_body['prompt'] = prompt
         else :
             request_body['messages'][1]['content'] = prompt
-        return request_body
\ No newline at end of file
+        return request_body
+    
+    def _extract_context(self, full_string):
+        pattern = re.escape('<context>') + "(.*?)" + re.escape('</context>')
+        match = re.search(pattern, full_string, re.DOTALL)
+        if match:
+            return '<context>' + match.group(1) + '</context>'
+        else:
+            return ''
\ No newline at end of file
diff --git a/tale/llm/llm_utils.py b/tale/llm/llm_utils.py
index dcfd5cc6..b8dcd18e 100644
--- a/tale/llm/llm_utils.py
+++ b/tale/llm/llm_utils.py
@@ -100,11 +100,9 @@ def evoke(self, player_io: TextBuffer, message: str, short_len : bool=False, rol
             text = self.io_util.synchronous_request(request_body, prompt=prompt)
             llm_cache.cache_look(text, text_hash_value)
             return output_template.format(message=message, text=text), rolling_prompt
-        text = self.io_util.stream_request(request_body=request_body, player_io=player_io, prompt=prompt, io=self.connection)     
-        player_io.print(output_template.format(message=message, text=text), end=False, format=True, line_breaks=False)
         
+        text = self.io_util.stream_request(request_body=request_body, player_io=player_io, prompt=prompt, io=self.connection)
         llm_cache.cache_look(text, text_hash_value)
-        
         return '\n', rolling_prompt
     
     def generate_dialogue(self, conversation: str, 
diff --git a/tale/player.py b/tale/player.py
index 30f27e73..43b842e6 100644
--- a/tale/player.py
+++ b/tale/player.py
@@ -339,9 +339,9 @@ def output(self, *lines: str) -> None:
         """directly writes the given text to the player's screen, without buffering and formatting/wrapping"""
         self.io.output(*lines)
 
-    def output_no_newline(self, line: str) -> None:
+    def output_no_newline(self, line: str, new_paragraph = True) -> None:
         """similar to output() but writes a single line, without newline at the end"""
-        self.io.output_no_newline(self.io.smartquotes(line))
+        self.io.output_no_newline(self.io.smartquotes(line), new_paragraph)
 
     def input_direct(self, prompt: str) -> str:
         """
diff --git a/tale/tio/console_io.py b/tale/tio/console_io.py
index b333f7ff..6f210537 100644
--- a/tale/tio/console_io.py
+++ b/tale/tio/console_io.py
@@ -177,12 +177,12 @@ def output(self, *lines: str) -> None:
             print(self._apply_style(line, self.do_styles))
         sys.stdout.flush()
 
-    def output_no_newline(self, text: str) -> None:
+    def output_no_newline(self, text: str, new_paragraph = True) -> None:
         """Like output, but just writes a single line, without end-of-line."""
         if prompt_toolkit and self.do_prompt_toolkit:
             self.output(text)
         else:
-            super().output_no_newline(text)
+            super().output_no_newline(text, new_paragraph)
             print(self._apply_style(text, self.do_styles), end="")
             sys.stdout.flush()
 
diff --git a/tale/tio/if_browser_io.py b/tale/tio/if_browser_io.py
index 3cfe94c2..207e7c39 100644
--- a/tale/tio/if_browser_io.py
+++ b/tale/tio/if_browser_io.py
@@ -157,12 +157,15 @@ def output(self, *lines: str) -> None:
                 self.output_no_newline(line)
             self.__new_html_available.set()
 
-    def output_no_newline(self, text: str) -> None:
-        super().output_no_newline(text)
+    def output_no_newline(self, text: str, new_paragraph = True) -> None:
+        super().output_no_newline(text, new_paragraph)
         text = self.convert_to_html(text)
         if text == "\n":
             text = "<br>"
-        self.__html_to_browser.append("<p>" + text + "</p>\n")
+        if new_paragraph:
+            self.__html_to_browser.append("<p>" + text + "</p>\n")
+        else:
+            self.__html_to_browser.append(text.replace("\\n", "<br>"))
         self.__new_html_available.set()
 
     def convert_to_html(self, line: str) -> str:
diff --git a/tale/tio/iobase.py b/tale/tio/iobase.py
index f35d31c2..4ab3e178 100644
--- a/tale/tio/iobase.py
+++ b/tale/tio/iobase.py
@@ -99,7 +99,7 @@ def output(self, *lines: str) -> None:
         """
         self.last_output_line = lines[-1]
 
-    def output_no_newline(self, text: str) -> None:
+    def output_no_newline(self, text: str, new_paragraph = True) -> None:
         """
         Like output, but just writes a single line, without end-of-line.
         Implement specific behavior in subclass (but don't forget to call base method)
diff --git a/tale/tio/tkinter_io.py b/tale/tio/tkinter_io.py
index ca1500ce..90600267 100644
--- a/tale/tio/tkinter_io.py
+++ b/tale/tio/tkinter_io.py
@@ -91,9 +91,9 @@ def output(self, *lines: str) -> None:
         for line in lines:
             self.gui.write_line(line)
 
-    def output_no_newline(self, text: str) -> None:
+    def output_no_newline(self, text: str, new_paragraph = True) -> None:
         """Like output, but just writes a single line, without end-of-line."""
-        super().output_no_newline(text)
+        super().output_no_newline(text, new_paragraph)
         self.gui.write_line(text)
 
 
diff --git a/tests/test_player.py b/tests/test_player.py
index cd0a5530..9d08555f 100644
--- a/tests/test_player.py
+++ b/tests/test_player.py
@@ -582,6 +582,13 @@ def test_strip(self):
         output.print("   1   ", format=False)
         self.assertEqual([("   1   \n", False)], output.get_paragraphs())
 
+    def test_no_line_break(self):
+        output = TextBuffer()
+        output.print("1", line_breaks=False)
+        output.print("2", line_breaks=False)
+        output.print("3", line_breaks=False)
+        self.assertEqual([("123\n", True)], output.get_paragraphs())
+
 
 class TestCharacterBuilders(unittest.TestCase):
     def setUp(self):

From 540d2507b30902955efb41f74b60eb4dc9565d43 Mon Sep 17 00:00:00 2001
From: rickard <rickard@mindemia.com>
Date: Sun, 7 Jan 2024 16:30:42 +0100
Subject: [PATCH 3/5] some tidying up for streaming

---
 tale/llm/llm_io.py      | 17 ++++++++++-------
 tale/llm/llm_utils.py   |  5 +++--
 tale/player.py          |  3 +--
 tests/supportstuff.py   |  1 +
 tests/test_llm_utils.py |  3 ++-
 tests/test_player.py    |  7 -------
 6 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/tale/llm/llm_io.py b/tale/llm/llm_io.py
index 957e0d92..f54ad4d1 100644
--- a/tale/llm/llm_io.py
+++ b/tale/llm/llm_io.py
@@ -4,6 +4,7 @@
 import aiohttp
 import asyncio
 import json
+from tale.errors import LlmResponseException
 import tale.parse_utils as parse_utils
 from tale.player_utils import TextBuffer
 
@@ -41,10 +42,13 @@ def synchronous_request(self, request_body: dict, prompt: str) -> str:
             request_body['response_format'] = self.openai_json_format
         self._set_prompt(request_body, prompt)
         response = requests.post(self.url + self.endpoint, headers=self.headers, data=json.dumps(request_body))
-        if self.backend == 'kobold_cpp':
-            parsed_response = self._parse_kobold_result(response.text)
-        else:
-            parsed_response = self._parse_openai_result(response.text)
+        try:
+            if self.backend == 'kobold_cpp':
+                parsed_response = self._parse_kobold_result(response.text)
+            else:
+                parsed_response = self._parse_openai_result(response.text)
+        except LlmResponseException as exc:
+            return ''
         return parsed_response
     
     def asynchronous_request(self, request_body: dict, prompt: str) -> str:
@@ -87,7 +91,7 @@ def _do_process_result(self, url, io = None, wait: bool = False) -> str:
                 new_text = text[len(old_text):]
                 io.output_no_newline(new_text, new_paragraph=False)
             old_text = text
-        io.output_no_newline("")
+        io.output_no_newline("</p>", new_paragraph=False)
         return old_text
 
     def _parse_kobold_result(self, result: str) -> str:
@@ -99,8 +103,7 @@ def _parse_openai_result(self, result: str) -> str:
         try:
             return json.loads(result)['choices'][0]['message']['content']
         except:
-            print("Error parsing result from OpenAI")
-            print(result)
+            raise LlmResponseException("Error parsing result from backend")
 
     def _set_prompt(self, request_body: dict, prompt: str) -> dict:
         if self.user_start_prompt:
diff --git a/tale/llm/llm_utils.py b/tale/llm/llm_utils.py
index b8dcd18e..e0554bc5 100644
--- a/tale/llm/llm_utils.py
+++ b/tale/llm/llm_utils.py
@@ -69,7 +69,7 @@ def __init__(self, io_util: IoUtil = None):
                                              io_util=self.io_util,
                                              backend=self.backend)
 
-    def evoke(self, player_io: TextBuffer, message: str, short_len : bool=False, rolling_prompt='', alt_prompt='', skip_history=True):
+    def evoke(self, message: str, short_len : bool=False, rolling_prompt='', alt_prompt='', skip_history=True):
         """Evoke a response from LLM. Async if stream is True, otherwise synchronous.
         Update the rolling prompt with the latest message.
         Will put generated text in lm_cache.look_hashes, and reuse it if same hash is generated."""
@@ -101,7 +101,8 @@ def evoke(self, player_io: TextBuffer, message: str, short_len : bool=False, rol
             llm_cache.cache_look(text, text_hash_value)
             return output_template.format(message=message, text=text), rolling_prompt
         
-        text = self.io_util.stream_request(request_body=request_body, player_io=player_io, prompt=prompt, io=self.connection)
+        self.connection.output(output_template.format(message=message, text='<p>'))
+        text = self.io_util.stream_request(request_body=request_body, prompt=prompt, io=self.connection)
         llm_cache.cache_look(text, text_hash_value)
         return '\n', rolling_prompt
     
diff --git a/tale/player.py b/tale/player.py
index 43b842e6..c42a50f9 100644
--- a/tale/player.py
+++ b/tale/player.py
@@ -78,8 +78,7 @@ def tell(self, message: str, *, end: bool=False, format: bool=True, evoke: bool=
         if evoke:
             if self.title in message:
                 message = message.replace(self.title, 'you')
-            msg, rolling_prompt = mud_context.driver.llm_util.evoke(self._output, 
-                                                                    message, 
+            msg, rolling_prompt = mud_context.driver.llm_util.evoke(message, 
                                                                     short_len = short_len, 
                                                                     rolling_prompt = self.rolling_prompt, 
                                                                     alt_prompt = alt_prompt)
diff --git a/tests/supportstuff.py b/tests/supportstuff.py
index fcba4775..a4019885 100644
--- a/tests/supportstuff.py
+++ b/tests/supportstuff.py
@@ -65,6 +65,7 @@ class FakeIoUtil(IoUtil):
     def __init__(self, response: list = []) -> None:
         super().__init__()
         self.response = response # type: list
+        self.backend = 'kobold_cpp'
 
     def synchronous_request(self, request_body: dict, prompt: str = None) -> str:
         return self.response.pop(0) if isinstance(self.response, list) > 0 and len(self.response) > 0 else self.response
diff --git a/tests/test_llm_utils.py b/tests/test_llm_utils.py
index ff58a44e..65945676 100644
--- a/tests/test_llm_utils.py
+++ b/tests/test_llm_utils.py
@@ -36,8 +36,9 @@ def test_read_items(self):
     def test_evoke(self):
         evoke_string = 'test response'
         self.llm_util.io_util = FakeIoUtil(response=evoke_string)
+        
         self.llm_util.set_story(self.story)
-        result = self.llm_util.evoke(message='test evoke', player_io=None)
+        result = self.llm_util.evoke(message='test evoke')
         assert(result)
         assert(llm_cache.get_looks([llm_cache.generate_hash('test evoke')]) == evoke_string)
 
diff --git a/tests/test_player.py b/tests/test_player.py
index 9d08555f..cd0a5530 100644
--- a/tests/test_player.py
+++ b/tests/test_player.py
@@ -582,13 +582,6 @@ def test_strip(self):
         output.print("   1   ", format=False)
         self.assertEqual([("   1   \n", False)], output.get_paragraphs())
 
-    def test_no_line_break(self):
-        output = TextBuffer()
-        output.print("1", line_breaks=False)
-        output.print("2", line_breaks=False)
-        output.print("3", line_breaks=False)
-        self.assertEqual([("123\n", True)], output.get_paragraphs())
-
 
 class TestCharacterBuilders(unittest.TestCase):
     def setUp(self):

From 26c995e26b8763b77c5b20940695f554eb21dac3 Mon Sep 17 00:00:00 2001
From: rickard <rickard@mindemia.com>
Date: Sun, 7 Jan 2024 18:54:31 +0100
Subject: [PATCH 4/5] some more wrangling of streamed data

---
 backend_kobold_cpp.yaml |  2 +-
 tale/llm/LivingNpc.py   | 12 ++++++------
 tale/llm/llm_io.py      |  3 ++-
 tale/llm/llm_utils.py   |  4 ++--
 tale/resources_utils.py |  6 ++++++
 5 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/backend_kobold_cpp.yaml b/backend_kobold_cpp.yaml
index e94ab928..b4c4d804 100644
--- a/backend_kobold_cpp.yaml
+++ b/backend_kobold_cpp.yaml
@@ -1,6 +1,6 @@
 URL: "http://localhost:5001"
 ENDPOINT: "/api/v1/generate"
-STREAM: True
+STREAM: False
 STREAM_ENDPOINT: "/api/extra/generate/stream"
 DATA_ENDPOINT: "/api/extra/generate/check"
 DEFAULT_BODY: '{"stop_sequence": "\n\n\n\n", "max_length":750, "max_context_length":4096, "temperature":0.5, "top_k":120, "top_a":0.0, "top_p":0.85, "typical_p":1.0, "tfs":1.0, "rep_pen":1.2, "rep_pen_range":256, "sampler_order":[6,0,1,3,4,2,5], "seed":-1}'
diff --git a/tale/llm/LivingNpc.py b/tale/llm/LivingNpc.py
index 47db9846..29a9b4b6 100644
--- a/tale/llm/LivingNpc.py
+++ b/tale/llm/LivingNpc.py
@@ -9,7 +9,7 @@
 from typing import Sequence
 
 from tale.quest import Quest
-from tale.resources_utils import pad_text_for_avatar
+from tale.resources_utils import pad_text_for_avatar, unpad_text
 
 
 class LivingNpc(Living):
@@ -34,7 +34,7 @@ def __init__(self, name: str, gender: str, *,
 
     def notify_action(self, parsed: ParseResult, actor: Living) -> None:
         # store even our own events.
-        event_hash = llm_cache.cache_event(parsed.unparsed)
+        event_hash = llm_cache.cache_event(unpad_text(parsed.unparsed))
         self._observed_events.append(event_hash)
         if actor is self or parsed.verb in self.verbs:
             return  # avoid reacting to ourselves, or reacting to verbs we already have a handler for
@@ -80,7 +80,7 @@ def notify_action(self, parsed: ParseResult, actor: Living) -> None:
             self._clear_quest()
     
     def do_say(self, what_happened: str, actor: Living) -> None:
-        tell_hash = llm_cache.cache_tell('{actor.title} says {what_happened}'.format(actor=actor, what_happened=what_happened))
+        tell_hash = llm_cache.cache_tell('{actor.title} says {what_happened}'.format(actor=actor, what_happened=unpad_text(what_happened)))
         self._conversations.append(tell_hash)
         short_len = False if isinstance(actor, Player) else True
         item = None
@@ -108,7 +108,7 @@ def do_say(self, what_happened: str, actor: Living) -> None:
         if not response:
             raise LlmResponseException("Failed to parse dialogue")
 
-        tell_hash = llm_cache.cache_tell('{actor.title} says: {response}'.format(actor=self, response=response))
+        tell_hash = llm_cache.cache_tell('{actor.title} says: {response}'.format(actor=self, response=unpad_text(response)))
         self._conversations.append(tell_hash)
         self._defer_result(response, verb='say')
         if item:
@@ -213,7 +213,7 @@ def autonomous_action(self) -> str:
         defered_actions = []
         if action.get('text', ''):
             text = action['text']
-            tell_hash = llm_cache.cache_tell('{actor.title} says: "{response}"'.format(actor=self, response=text))
+            tell_hash = llm_cache.cache_tell('{actor.title} says: "{response}"'.format(actor=self, response=unpad_text(text)))
             self._conversations.append(tell_hash)
             #if mud_context.config.custom_resources:
             if action.get('target'):
@@ -260,7 +260,7 @@ def tell_action_deferred(self):
         actions = '\n'.join(self.deferred_actions)
         deferred_action = ParseResult(verb='idle-action', unparsed=actions, who_info=None)
         self.tell_others(actions + '\n')
-        self.location._notify_action_all(deferred_action, actor=self)
+        #self.location._notify_action_all(deferred_action, actor=self)
         self.deferred_actions.clear()
 
     def _clear_quest(self):
diff --git a/tale/llm/llm_io.py b/tale/llm/llm_io.py
index f54ad4d1..33f6c31e 100644
--- a/tale/llm/llm_io.py
+++ b/tale/llm/llm_io.py
@@ -48,6 +48,7 @@ def synchronous_request(self, request_body: dict, prompt: str) -> str:
             else:
                 parsed_response = self._parse_openai_result(response.text)
         except LlmResponseException as exc:
+            print("Error parsing response from backend - ", exc)
             return ''
         return parsed_response
     
@@ -91,7 +92,7 @@ def _do_process_result(self, url, io = None, wait: bool = False) -> str:
                 new_text = text[len(old_text):]
                 io.output_no_newline(new_text, new_paragraph=False)
             old_text = text
-        io.output_no_newline("</p>", new_paragraph=False)
+        #io.output_no_newline("</p>", new_paragraph=False)
         return old_text
 
     def _parse_kobold_result(self, result: str) -> str:
diff --git a/tale/llm/llm_utils.py b/tale/llm/llm_utils.py
index e0554bc5..807a1ef7 100644
--- a/tale/llm/llm_utils.py
+++ b/tale/llm/llm_utils.py
@@ -100,8 +100,8 @@ def evoke(self, message: str, short_len : bool=False, rolling_prompt='', alt_pro
             text = self.io_util.synchronous_request(request_body, prompt=prompt)
             llm_cache.cache_look(text, text_hash_value)
             return output_template.format(message=message, text=text), rolling_prompt
-        
-        self.connection.output(output_template.format(message=message, text='<p>'))
+        if self.connection:
+            self.connection.output(output_template.format(message=message, text=''))
         text = self.io_util.stream_request(request_body=request_body, prompt=prompt, io=self.connection)
         llm_cache.cache_look(text, text_hash_value)
         return '\n', rolling_prompt
diff --git a/tale/resources_utils.py b/tale/resources_utils.py
index 6ccf0119..17179817 100644
--- a/tale/resources_utils.py
+++ b/tale/resources_utils.py
@@ -5,6 +5,12 @@ def pad_text_for_avatar(text: str, npc_name: str) -> str:
     """Pad text for NPC output."""
     return npc_name + ' <:> ' + text if npc_name else text
 
+def unpad_text(text: str) -> str:
+    """Unpad text for NPC output."""
+    if '<:>' not in text:
+        return text
+    return text.split('<:>')[-1].strip()
+
 def check_file_exists_in_resources(file_name) -> str:
     file_path = os.path.join(os.path.dirname('../../tale/web/resources/'), file_name + '.jpg')
     if os.path.exists(file_path):

From 3f4a1929517469f3b71b3307d4e7716aef7d405a Mon Sep 17 00:00:00 2001
From: rickard <rickard@mindemia.com>
Date: Sun, 7 Jan 2024 18:57:35 +0100
Subject: [PATCH 5/5] reverting using kobold 'memory' until further tested

---
 tale/llm/llm_io.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tale/llm/llm_io.py b/tale/llm/llm_io.py
index 33f6c31e..81abcb3d 100644
--- a/tale/llm/llm_io.py
+++ b/tale/llm/llm_io.py
@@ -92,7 +92,6 @@ def _do_process_result(self, url, io = None, wait: bool = False) -> str:
                 new_text = text[len(old_text):]
                 io.output_no_newline(new_text, new_paragraph=False)
             old_text = text
-        #io.output_no_newline("</p>", new_paragraph=False)
         return old_text
 
     def _parse_kobold_result(self, result: str) -> str:
@@ -112,8 +111,6 @@ def _set_prompt(self, request_body: dict, prompt: str) -> dict:
         if self.user_end_prompt:
             prompt = prompt + self.user_end_prompt
         if self.backend == 'kobold_cpp':
-            context = self._extract_context(prompt)
-            request_body['memory'] = context
             request_body['prompt'] = prompt
         else :
             request_body['messages'][1]['content'] = prompt