Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft: Schedule File GenAI Perf frontend #168

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from genai_perf.inputs.converters.base_converter import BaseConverter
from genai_perf.inputs.input_constants import DEFAULT_OUTPUT_TOKENS_MEAN
from genai_perf.inputs.inputs_config import InputsConfig
from genai_perf.inputs.retrievers.generic_dataset import GenericDataset
from genai_perf.inputs.retrievers.generic_dataset import DataRow, GenericDataset
from genai_perf.utils import sample_bounded_normal


Expand All @@ -50,6 +50,7 @@ def convert(
"prompt": prompt,
}
self._add_request_params(payload, config)
self._override_extra(payload, row)
request_body["data"].append({"payload": [payload]})

return request_body
Expand All @@ -67,3 +68,7 @@ def _add_request_params(self, payload: Dict, config: InputsConfig) -> None:
)
for key, value in config.extra_inputs.items():
payload[key] = value

def _override_extra(self, payload: Dict, row: DataRow) -> None:
for key, value in row.extra_args.items():
payload[key] = value
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
DEFAULT_TENSORRTLLM_MAX_TOKENS,
)
from genai_perf.inputs.inputs_config import InputsConfig
from genai_perf.inputs.retrievers.generic_dataset import GenericDataset
from genai_perf.inputs.retrievers.generic_dataset import DataRow, GenericDataset
from genai_perf.utils import sample_bounded_normal


Expand Down Expand Up @@ -62,6 +62,7 @@ def convert(
"request_output_len": [DEFAULT_TENSORRTLLM_MAX_TOKENS],
}
self._add_request_params(payload, config)
self._override_extra(payload, row)
request_body["data"].append(payload)

return request_body
Expand All @@ -83,3 +84,9 @@ def _add_request_params(self, payload: Dict, config: InputsConfig) -> None:

for key, value in config.extra_inputs.items():
payload[key] = [value]

def _override_extra(self, payload: Dict, row: DataRow) -> None:
for key, value in row.extra_args.items():
if key == "max_tokens":
payload["request_output_len"] = [value]
payload["min_length"] = [value]
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from genai_perf.inputs.converters.base_converter import BaseConverter
from genai_perf.inputs.input_constants import DEFAULT_OUTPUT_TOKENS_MEAN
from genai_perf.inputs.inputs_config import InputsConfig
from genai_perf.inputs.retrievers.generic_dataset import GenericDataset
from genai_perf.inputs.retrievers.generic_dataset import DataRow, GenericDataset
from genai_perf.utils import sample_bounded_normal


Expand All @@ -56,6 +56,7 @@ def convert(
"text_input": prompt,
}
self._add_request_params(payload, config)
self._override_extra(payload, row)
request_body["data"].append({"payload": [payload]})

return request_body
Expand All @@ -73,3 +74,8 @@ def _add_request_params(self, payload: Dict, config: InputsConfig) -> None:
)
for key, value in config.extra_inputs.items():
payload[key] = value

def _override_extra(self, payload: Dict, row: DataRow) -> None:
for key, value in row.extra_args.items():
if key == "max_tokens":
payload["max_tokens"] = value
6 changes: 6 additions & 0 deletions genai-perf/genai_perf/inputs/inputs_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,12 @@ class InputsConfig:
# The filename where the input data is available
input_filename: Optional[Path] = Path("")

# TODO
schedule_file: Optional[str] = None

# TODO
block_size: int = 512

# The filenames used for synthetic data generation
synthetic_input_filenames: Optional[List[str]] = field(default_factory=list)

Expand Down
5 changes: 4 additions & 1 deletion genai-perf/genai_perf/inputs/retrievers/generic_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from dataclasses import dataclass, field
from typing import Dict, List, TypeAlias
from typing import Any, Dict, List

from typing_extensions import TypeAlias

Filename: TypeAlias = str
TypeOfData: TypeAlias = str
Expand All @@ -38,6 +40,7 @@
class DataRow:
texts: List[str] = field(default_factory=list)
images: List[str] = field(default_factory=list)
extra_args: Dict[str, Any] = field(default_factory=dict)

def to_dict(self) -> DataRowDict:
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


import json
from typing import List

from genai_perf.inputs.input_constants import DEFAULT_SYNTHETIC_FILENAME
Expand All @@ -51,6 +52,13 @@ def retrieve_data(self) -> GenericDataset:
files = self.config.synthetic_input_filenames or [DEFAULT_SYNTHETIC_FILENAME]
synthetic_dataset = GenericDataset(files_data={})

prompt_desc = []
if self.config.schedule_file is not None:
with open(self.config.schedule_file, "r") as f:
for j, line in enumerate(f):
if j == self.config.num_dataset_entries:
break
prompt_desc.append(json.loads(line))
use_prefix_prompts = self.config.num_prefix_prompts > 0
if use_prefix_prompts:
SyntheticPromptGenerator.create_prefix_prompts_pool(
Expand All @@ -62,14 +70,29 @@ def retrieve_data(self) -> GenericDataset:
for file in files:
data_rows: List[DataRow] = []

for _ in range(self.config.num_dataset_entries):
for i in range(self.config.num_dataset_entries):
row = DataRow(texts=[], images=[])

for _ in range(self.config.batch_size_text):
prompt = SyntheticPromptGenerator.create_synthetic_prompt(
self.config.tokenizer,
self.config.prompt_tokens_mean,
self.config.prompt_tokens_stddev,
)
if prompt_desc:
prompt = SyntheticPromptGenerator.create_synthetic_prompt(
self.config.tokenizer,
prompt_desc[i]["input_length"],
0,
prompt_desc[i].get("hash_ids", None),
self.config.block_size,
)
# Generic processing needed here probably
row.extra_args["max_tokens"] = prompt_desc[i].get(
"output_length", None
)
# row.extra_args["model"] = prompt_desc[i].get("model", None)
else:
prompt = SyntheticPromptGenerator.create_synthetic_prompt(
self.config.tokenizer,
self.config.prompt_tokens_mean,
self.config.prompt_tokens_stddev,
)
if use_prefix_prompts:
prefix_prompt = (
SyntheticPromptGenerator.get_random_prefix_prompt()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@
import pathlib
import random
from concurrent.futures import ThreadPoolExecutor
from typing import List
from typing import Dict, List, Optional

from genai_perf.tokenizer import Tokenizer


class SyntheticPromptGenerator:
cache: Dict[int, str] = {}
_tokenized_corpus = None
_corpus_length = 0
_prefix_prompts: List[str] = []
Expand All @@ -32,6 +33,8 @@ def create_synthetic_prompt(
tokenizer: Tokenizer,
prompt_tokens_mean: int = 550,
prompt_tokens_stddev: int = 250,
prompt_hash_list: Optional[List[int]] = None,
block_size: Optional[int] = None,
) -> str:
"""
Generate a synthetic prompt with a specific number of tokens.
Expand All @@ -47,11 +50,28 @@ def create_synthetic_prompt(
if cls._tokenized_corpus is None:
cls._initialize_corpus(tokenizer)

num_prompt_tokens = max(
1, int(random.gauss(prompt_tokens_mean, prompt_tokens_stddev))
)
if prompt_hash_list is not None:
assert block_size, "Need block size to continue"
final_prompt = []
size_to_use = block_size
for j, hash_index in enumerate(prompt_hash_list):
if j == len(prompt_hash_list) - 1:
size_to_use = prompt_tokens_mean - (j * block_size)
if hash_index not in cls.cache:
prompt = cls._generate_prompt(tokenizer, size_to_use)
cls.cache[hash_index] = prompt

return cls._generate_prompt(tokenizer, num_prompt_tokens)
final_prompt.append(cls.cache[hash_index])
prompt = " ".join(final_prompt)

else:
num_prompt_tokens = max(
1, int(random.gauss(prompt_tokens_mean, prompt_tokens_stddev))
)

prompt = cls._generate_prompt(tokenizer, num_prompt_tokens)

return prompt

@classmethod
def _initialize_corpus(cls, tokenizer: Tokenizer):
Expand Down
18 changes: 17 additions & 1 deletion genai-perf/genai_perf/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def _check_load_manager_args(args: argparse.Namespace) -> argparse.Namespace:
Check inference load args
"""
# If no concurrency or request rate is set, default to 1
if not args.concurrency and not args.request_rate:
if not args.concurrency and not args.request_rate and not args.schedule_file:
args.concurrency = 1
return args

Expand Down Expand Up @@ -579,6 +579,22 @@ def _add_input_args(parser):
"'synthetic:queries,passages'. ",
)

input_group.add_argument(
"--schedule-file",
type=str,
default=None,
required=False,
help="Fixed Schedule TODO",
)

input_group.add_argument(
"--block-size",
type=int,
default=512,
required=False,
help="Fixed Schedule TODO",
)

input_group.add_argument(
"--num-dataset-entries",
"--num-prompts",
Expand Down
2 changes: 1 addition & 1 deletion genai-perf/genai_perf/subcommand/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ def _determine_infer_mode_and_load_level(
infer_mode = "request_rate"
load_level = f"{args.request_rate}"
else:
infer_mode = "concurrency"
infer_mode = "request_rate"
load_level = "1"
else:
raise GenAIPerfException("Cannot determine infer_mode/load_level")
Expand Down
2 changes: 2 additions & 0 deletions genai-perf/genai_perf/subcommand/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,8 @@ def create_config_options(args: Namespace) -> InputsConfig:
image_format=args.image_format,
random_seed=args.random_seed,
num_dataset_entries=args.num_dataset_entries,
schedule_file=args.schedule_file,
block_size=args.block_size,
add_stream=args.streaming,
tokenizer=get_tokenizer(
args.tokenizer, args.tokenizer_trust_remote_code, args.tokenizer_revision
Expand Down
3 changes: 3 additions & 0 deletions genai-perf/genai_perf/subcommand/profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ def _report_output(
elif args.request_rate:
infer_mode = "request_rate"
load_level = f"{args.request_rate}"
elif args.schedule_file:
infer_mode = "request_rate"
load_level = "1.0"
else:
raise GenAIPerfException("No valid infer mode specified")

Expand Down
16 changes: 16 additions & 0 deletions genai-perf/genai_perf/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json
import os

Check notice

Code scanning / CodeQL

Unused import Note

Import of 'os' is not used.
import subprocess

Check notice

Code scanning / CodeQL

Unused import Note

Import of 'subprocess' is not used.
from argparse import Namespace
from typing import List, Optional

Expand Down Expand Up @@ -60,6 +63,17 @@
cmd += ["--concurrency-range", f"{args.concurrency}"]
elif args.request_rate:
cmd += ["--request-rate-range", f"{args.request_rate}"]

if args.schedule_file is not None:
# assert args.request_rate, "Must use request rate with fixed schedule"
timings = []
with open(args.schedule_file, "r") as f:
for j, line in enumerate(f):
if j == args.num_dataset_entries:
break
timings.append(float(json.loads(line)["timestamp"]) / 1000)
cmd += ["--request-rate-range", "1"]
cmd += ["--schedule", ",".join(map(str, timings))]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure what the expected format is for the schedule is after recent changes.

return cmd

@staticmethod
Expand All @@ -69,6 +83,7 @@
"backend",
"batch_size_image",
"batch_size_text",
"block_size",
"concurrency",
"endpoint_type",
"extra_inputs",
Expand All @@ -95,6 +110,7 @@
"prompt_source",
"random_seed",
"request_rate",
"schedule_file",
"server_metrics_url",
# The 'streaming' passed in to this script is to determine if the
# LLM response should be streaming. That is different than the
Expand Down
Loading