Skip to content

Commit

Permalink
Merge branch 'batch-serving' into parallel-sampling-dev
Browse files Browse the repository at this point in the history
  • Loading branch information
masahi committed Dec 11, 2023
2 parents 8977d48 + a356074 commit 8bbe0b3
Showing 1 changed file with 4 additions and 5 deletions.
9 changes: 4 additions & 5 deletions serve/mlc_serve/engine/engine_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,23 +194,22 @@ def should_stop_by_length(state: RequestState, max_context_length: int) -> bool:
# TODO: currently, we simply return true for both stopping reasons.
# in the future, we can differentiate these two.
# this include prompt tokens and gen tokens so far
if state.is_finished:
if state.is_finished or state.stopping_criteria.max_tokens is None:
return False

for gen_seq in state.generation_sequences:
if gen_seq.is_finished:
continue

num_context_tokens = state.prompt_len + len(gen_seq.generated_token_ids)

if num_context_tokens >= max_context_length:
gen_seq.is_finished = True
continue

num_gen_tokens = num_context_tokens - state.prompt_len
if (
state.stopping_criteria.max_tokens is not None
and num_gen_tokens < state.stopping_criteria.max_tokens
):

if num_gen_tokens < state.stopping_criteria.max_tokens:
return False

return True
Expand Down

0 comments on commit 8bbe0b3

Please sign in to comment.