Skip to content

Commit

Permalink
add model context to metrics output
Browse files Browse the repository at this point in the history
  • Loading branch information
rbx committed Oct 13, 2024
1 parent 169ba71 commit 1bc6c05
Showing 1 changed file with 10 additions and 4 deletions.
14 changes: 10 additions & 4 deletions src/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ def post_request_generate(model, prompt):
model_family = model_details.get("details", {}).get("family", "N/A")
model_parameter_size = model_details.get("details", {}).get("parameter_size", "N/A")
model_quantization_level = model_details.get("details", {}).get("quantization_level", "N/A")
context_length = model_details.get("model_info", {}).get("llama.context_length", "N/A")

# print(json.dumps(model_details, indent=3))

response_content = ""
with requests.post(url, headers=headers, json=data, stream=True) as r:
Expand All @@ -26,16 +29,19 @@ def post_request_generate(model, prompt):
else:
# This is the final response with metrics
print("\n\n---------------------")
print(f"Model: {model} (Family: {model_family}), Format: {model_format}, Parameter Size: {model_parameter_size}, Quantization Level: {model_quantization_level}")
print(f"Model: {model}")
print(f" Family: {model_family}, Format: {model_format}")
print(f" Parameter Size: {model_parameter_size}, Quantization: {model_quantization_level}")
print(f" Context Length: {context_length}")
eval_count = json_response.get("eval_count", 0)
prompt_eval_count = json_response.get('prompt_eval_count', 0)
eval_duration = json_response.get("eval_duration", 1) # in nanoseconds
tokens_per_second = (eval_count / eval_duration) * 1e9
print(f"Tokens generated: {eval_count}")
print(f"Generation time: {eval_duration / 1e9:.2f} seconds")
print(f"Speed: {tokens_per_second:.2f} tokens/second")
print(f"Prompt tokens: {prompt_eval_count}")
print(f"Tokens generated: {eval_count}")
print(f"Total tokens: {prompt_eval_count + eval_count}")
print(f"Speed: {tokens_per_second:.2f} tokens/second")
print(f"Generation time: {eval_duration / 1e9:.2f} seconds")
print(f"Total duration: {json_response.get('total_duration', 0) / 1e9:.2f} seconds")
print("---------------------")
print()
Expand Down

0 comments on commit 1bc6c05

Please sign in to comment.