diff --git a/examples/python/run_llama_batched_vllm.py b/examples/python/run_llama_batched_vllm.py index 5cb7f52ae6..dcb16a878d 100644 --- a/examples/python/run_llama_batched_vllm.py +++ b/examples/python/run_llama_batched_vllm.py @@ -277,7 +277,7 @@ def _prepare_eval_queries( if sliding_window: seq_lens.append(min(num_past_tokens + num_queries, sliding_window)) num_past = min(num_past_tokens, sliding_window) - past_slot_mapping += all_slot_mappings[request_id][num_past:] + past_slot_mapping += all_slot_mappings[request_id][:num_past] slot_mapping += all_slot_mappings[request_id][num_past: num_past + num_queries] else: seq_lens.append(num_past_tokens + num_queries)