Skip to content

Commit

Permalink
add missing free_request method to Dummy cache manager
Browse files Browse the repository at this point in the history
  • Loading branch information
masahi committed Dec 11, 2023
1 parent fd39416 commit c8b7f55
Showing 1 changed file with 6 additions and 12 deletions.
18 changes: 6 additions & 12 deletions serve/mlc_serve/model/dummy_model.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,18 @@
from typing import Optional, Union
from typing import Union

from mlc_serve.engine import (
ChatMessage,
DebugOptions,
FinishReason,
Request,
RequestState,
RequestId,
RequestOutput,
SamplingParams,
StoppingCriteria,
get_engine_config
)
from mlc_serve.model.base import ModelArtifactConfig
from mlc_serve.engine.model_module import (
ConversationTemplate,
DecodeRequest,
KVCache,
KVCacheManager,
ModelModule,
PrefillRequest,
SequenceId,
TextGenerationResult,
TextGenerator,
Tokenizer,
)

class DummyTokenizer:
Expand Down Expand Up @@ -74,6 +64,10 @@ def free(self, sequence_id: SequenceId):
raise RuntimeError("Multiple generated sequences not supported")
del self.cache.cached_requests[sequence_id.request_id]

def free_request(self, state: RequestState):
for gen_seq in state.generation_sequences:
self.free(gen_seq.seq_id)

def get_kv_cache_size(self) -> int:
return self.cache.max_cached_tokens

Expand Down

0 comments on commit c8b7f55

Please sign in to comment.