Skip to content

Commit

Permalink
fixes
Browse files Browse the repository at this point in the history
Signed-off-by: Nick Hill <[email protected]>
  • Loading branch information
njhill committed Feb 14, 2025
1 parent 27676bd commit 2bcf20f
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 10 deletions.
18 changes: 9 additions & 9 deletions vllm/v1/worker/gpu_input_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,9 +160,7 @@ def __init__(
self.repetition_penalties_reqs: Set[str] = set()

self.min_tokens: List[int] = [0] * max_num_reqs
self.stop_token_ids: List[Set[int]] = [
set() for _ in range(max_num_reqs)
]
self.stop_token_ids: List[Set[int]] = [set()] * max_num_reqs

# lora related
self.request_lora_mapping = np.zeros((self.max_num_reqs, ),
Expand Down Expand Up @@ -196,18 +194,18 @@ def add_request(
request: "CachedRequestState",
req_index: Optional[int] = None,
) -> None:
req_id = request.req_id

if req_index is None:
req_index = self.num_reqs
assert req_index < self.max_num_reqs

req_id = request.req_id
if req_index == len(self._req_ids):
self._req_ids.append(req_id)
self.req_output_token_ids.append(request.output_token_ids)
else:
self._req_ids[req_index] = req_id
self.req_output_token_ids[req_index] = request.output_token_ids

assert req_index < self.max_num_reqs

self.req_id_to_index[req_id] = req_index

# Copy the prompt token ids and output token ids.
Expand Down Expand Up @@ -325,13 +323,15 @@ def clear(self) -> None:
self.lora_id_to_request_ids.clear()

def condense(self, empty_req_indices: List[int]) -> None:
if self.num_reqs == 0:
num_reqs = self.num_reqs
if num_reqs == 0:
# The batched states are empty.
self.clear()
return

# NOTE(woosuk): This function assumes that the empty_req_indices
# is sorted in descending order.
last_req_index = self.num_reqs + len(empty_req_indices) - 1
last_req_index = num_reqs + len(empty_req_indices) - 1
while empty_req_indices:
# Find the largest non-empty index.
while last_req_index in empty_req_indices:
Expand Down
3 changes: 2 additions & 1 deletion vllm/v1/worker/gpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,8 @@ def _update_states(self, scheduler_output: "SchedulerOutput") -> None:
self.input_batch.block_table.append_row(req_index, start_index,
req_data.new_block_ids)

batch_changed = len(removed_req_indices) > 0 or len(req_ids_to_add) > 0

# Add the new or resumed requests to the persistent batch.
# The smaller empty indices are filled first.
removed_req_indices = sorted(removed_req_indices, reverse=True)
Expand All @@ -362,7 +364,6 @@ def _update_states(self, scheduler_output: "SchedulerOutput") -> None:
if removed_req_indices:
self.input_batch.condense(removed_req_indices)

batch_changed = len(unscheduled_req_ids) > 0 or len(req_ids_to_add) > 0
if batch_changed:
self.input_batch.refresh_sampling_metadata()

Expand Down

0 comments on commit 2bcf20f

Please sign in to comment.