From 2a9ab8c48b3c0721453bfa6f03f305d65696c2b0 Mon Sep 17 00:00:00 2001
From: Leo Ribeiro <leordev@gmail.com>
Date: Wed, 20 Dec 2023 11:56:34 -0500
Subject: [PATCH] group computation of chunked responses

---
 main.py               | 31 ++++++++++++++++++-------------
 usage_cost_tracker.py |  7 -------
 2 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/main.py b/main.py
index 3f4d1ea..964e535 100644
--- a/main.py
+++ b/main.py
@@ -102,8 +102,8 @@ def openapi_spec():
 def ask_chat_route():
     usage_cost_tracker.check_usage_costs()
 
-    query = request.args.get('query')    
-    
+    query = request.args.get('query')
+
     messages = [{"role": "system", "content": "you are a helpful assistant to find the best names that match what knowledge is being asked for. Return only a list of matching names as requested."},
                {"role": "user", "content": "I will provide you lists of json objects which map a name of a piece of knowledge to a description. You then take a question from the website developer.tbd.website and return a list of names that best the question, 2 to 3 ideally."},
                {"role": "assistant", "content": "Got it."},
@@ -121,11 +121,11 @@ def ask_chat_route():
     response = client.chat.completions.create(model="gpt-4-1106-preview",
     messages=messages)
     usage_cost_tracker.compute_response_costs(response)
-    
+
     response_message = response.choices[0].message
     csv_list = response_message.content
     print("csv_list", csv_list)
-    
+
 
     csv_list = csv_list.split(',')
 
@@ -144,7 +144,7 @@ def ask_chat_route():
         _, code = content.split('-----', 1)
         knowledge += f"{item}:\n\n{code}\n\n"
 
-    
+
 
     messages = [{"role": "system", "content": "You are a helpful assistant that provides code examples and explanations when context is provided. Please don't invent APIs. Code examples should be surrounded with markdown backticks to make presentation easy."},
             {"role": "user", "content": "Please don't hallucinate responses if you don't know what the API is, stick to the content you know. Also remember code examples should be surrounded with markdown backticks to make presentation easy."},
@@ -155,6 +155,8 @@ def ask_chat_route():
 
 
     def stream():
+        response_tokens = 0
+        
         if knowledge == '':
             yield 'data: Sorry, I don\'t know about that topic. Please try again.\n\n'
             return
@@ -165,19 +167,22 @@ def stream():
         for line in completion:
             print(line.choices[0])
             chunk = line.choices[0].delta.content
-            if chunk:   
-                usage_cost_tracker.compute_stream_cost(chunk, "gpt-3.5-turbo-16k")                 
+            if chunk:
+                response_tokens += usage_cost_tracker.count_tokens(chunk)
+
                 if chunk.endswith("\n"):
-                    yield 'data: %s|CR|\n\n' % chunk.rstrip()                    
+                    yield 'data: %s|CR|\n\n' % chunk.rstrip()
                 else:
-                    yield 'data: %s\n\n' % chunk                    
+                    yield 'data: %s\n\n' % chunk
 
-        
-    return flask.Response(stream(), mimetype='text/event-stream')        
+        # Post process the response to add the cost    
+        usage_cost_tracker.compute_tokens_cost(response_tokens, "gpt-3.5-turbo-16k", is_output=True)
+
+    return flask.Response(stream(), mimetype='text/event-stream')
 
 
 def get_chat_functions():
-    functions = []    
+    functions = []
     for filename in os.listdir('content'):
         if filename.endswith('.txt'):
             topic = filename[:-4]
@@ -187,7 +192,7 @@ def get_chat_functions():
                     "name": f"{topic}",
                     "description": explanation.strip()
                 })
-    
+
     return functions
 
 def main():
diff --git a/usage_cost_tracker.py b/usage_cost_tracker.py
index d5b5224..2b9567f 100644
--- a/usage_cost_tracker.py
+++ b/usage_cost_tracker.py
@@ -81,13 +81,6 @@ def compute_messages_cost(self, messages, model_name):
         self.compute_tokens_cost(num_tokens, model_name)
 
 
-    def compute_stream_cost(self, chunk, model_name):
-        token_count = self.count_tokens(chunk)
-
-        # Assuming the chunk is the response content
-        self.compute_tokens_cost(token_count, model_name, is_output=True)
-
-
     def compute_tokens_cost(self, tokens, model_name, is_output=False):
         if model_name not in MODELS_COSTS:
             print(">>> WARNING! Model not found in MODELS_COSTS, impossible to compute costs")