Skip to content

Commit

Permalink
Bump the default max_num_seq to 2048 (#226)
Browse files Browse the repository at this point in the history
  • Loading branch information
yelite authored Mar 7, 2024
1 parent 72adea5 commit ce08442
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 2 deletions.
2 changes: 1 addition & 1 deletion serve/mlc_serve/engine/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class MLCServeEngineConfig:
# TODO(@sunggg): figure out better defaults
use_staging_engine: bool = True
max_num_batched_tokens: int = 4096
max_num_seq: int = 256
max_num_seq: int = 2048
max_num_seq_per_request: Optional[int] = None # default to `max_num_seq / 4`
min_decode_steps: int = 32
max_decode_steps: int = 48
Expand Down
2 changes: 1 addition & 1 deletion serve/mlc_serve/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def get_default_mlc_serve_argparser(description="", allow_override=False):
parser.add_argument("--use-sync-engine", action="store_true")
parser.add_argument("--num-sequences-to-sample", type=int, default=1)
parser.add_argument("--max-num-batched-tokens", type=int, default=4096)
parser.add_argument("--max-num-seq", type=int, default=256)
parser.add_argument("--max-num-seq", type=int, default=2048)
parser.add_argument("--min-decode-steps", type=int, default=32)
parser.add_argument("--max-decode-steps", type=int, default=56)
parser.add_argument("--gpu-memory-utilization", type=float, default=0.9)
Expand Down

0 comments on commit ce08442

Please sign in to comment.