From 1bc6c05cf0b0021f9afba9a5bd1f9096592eee5f Mon Sep 17 00:00:00 2001 From: Alexey Rybalchenko Date: Sun, 13 Oct 2024 11:09:32 +0200 Subject: [PATCH] add model context to metrics output --- src/model.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/model.py b/src/model.py index 4be8f1f..8834374 100644 --- a/src/model.py +++ b/src/model.py @@ -13,6 +13,9 @@ def post_request_generate(model, prompt): model_family = model_details.get("details", {}).get("family", "N/A") model_parameter_size = model_details.get("details", {}).get("parameter_size", "N/A") model_quantization_level = model_details.get("details", {}).get("quantization_level", "N/A") + context_length = model_details.get("model_info", {}).get("llama.context_length", "N/A") + + # print(json.dumps(model_details, indent=3)) response_content = "" with requests.post(url, headers=headers, json=data, stream=True) as r: @@ -26,16 +29,19 @@ def post_request_generate(model, prompt): else: # This is the final response with metrics print("\n\n---------------------") - print(f"Model: {model} (Family: {model_family}), Format: {model_format}, Parameter Size: {model_parameter_size}, Quantization Level: {model_quantization_level}") + print(f"Model: {model}") + print(f" Family: {model_family}, Format: {model_format}") + print(f" Parameter Size: {model_parameter_size}, Quantization: {model_quantization_level}") + print(f" Context Length: {context_length}") eval_count = json_response.get("eval_count", 0) prompt_eval_count = json_response.get('prompt_eval_count', 0) eval_duration = json_response.get("eval_duration", 1) # in nanoseconds tokens_per_second = (eval_count / eval_duration) * 1e9 - print(f"Tokens generated: {eval_count}") - print(f"Generation time: {eval_duration / 1e9:.2f} seconds") - print(f"Speed: {tokens_per_second:.2f} tokens/second") print(f"Prompt tokens: {prompt_eval_count}") + print(f"Tokens generated: {eval_count}") print(f"Total tokens: {prompt_eval_count + eval_count}") + print(f"Speed: {tokens_per_second:.2f} tokens/second") + print(f"Generation time: {eval_duration / 1e9:.2f} seconds") print(f"Total duration: {json_response.get('total_duration', 0) / 1e9:.2f} seconds") print("---------------------") print()