Skip to content

Commit

Permalink
remove
Browse files Browse the repository at this point in the history
  • Loading branch information
Deegue committed Jan 9, 2024
1 parent cadea69 commit a81fbb7
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 97 deletions.
88 changes: 0 additions & 88 deletions examples/inference/api_server_simple/query_batch.py

This file was deleted.

10 changes: 1 addition & 9 deletions examples/inference/api_server_simple/query_single.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import time
import argparse

parser = argparse.ArgumentParser(description="Model Inference Script", add_help=True)
parser = argparse.ArgumentParser(description="Example script to query with single request", add_help=True)
parser.add_argument("--model_endpoint", default="http://127.0.0.1:8000", type=str, help="Deployed model endpoint.")
parser.add_argument("--streaming_response", default=False, action="store_true", help="Whether to enable streaming response.")
parser.add_argument("--max_new_tokens", default=None, help="The maximum numbers of tokens to generate.")
Expand All @@ -41,12 +41,9 @@

sample_input = {"text": prompt, "config": config, "stream": args.streaming_response}

total_time = 0.0
num_iter = args.num_iter
num_warmup = 3
for i in range(num_iter):
print("iter: ", i)
tic = time.time()
proxies = { "http": None, "https": None}
outputs = requests.post(args.model_endpoint, proxies=proxies, json=sample_input, stream=args.streaming_response)
if args.streaming_response:
Expand All @@ -56,8 +53,3 @@
print()
else:
print(outputs.text, flush=True)
toc = time.time()
if i >= num_warmup:
total_time += (toc - tic)

print("Inference latency: %.3f ms." % (total_time / (num_iter - num_warmup) * 1000))

0 comments on commit a81fbb7

Please sign in to comment.