diff --git a/benchmarks/inference/mii/run_benchmark_client.py b/benchmarks/inference/mii/run_benchmark_client.py index 52fc0da50..caf20351e 100644 --- a/benchmarks/inference/mii/run_benchmark_client.py +++ b/benchmarks/inference/mii/run_benchmark_client.py @@ -74,8 +74,8 @@ def call_mii(client, input_tokens, max_new_tokens, stream): def callback(response): nonlocal time_last_token - # print(f"Received: {response.response} time_last_token={time_last_token}") - output_tokens.append(response.response[0]) + # print(f"Received: {response[0].generated_text} time_last_token={time_last_token}") + output_tokens.append(response[0].generated_text) time_now = time.time() token_gen_time.append(time_now - time_last_token) time_last_token = time_now @@ -90,7 +90,7 @@ def callback(response): else: result = client.generate( input_tokens, max_new_tokens=max_new_tokens) - output_tokens = result.response[0] + output_tokens = result[0].generated_text return ResponseDetails( generated_tokens=output_tokens,