diff --git a/shortfin/python/shortfin_apps/llm/components/kvcache/trie_attention_cache.py b/shortfin/python/shortfin_apps/llm/components/kvcache/trie_attention_cache.py index c2a31039d..fbb008005 100644 --- a/shortfin/python/shortfin_apps/llm/components/kvcache/trie_attention_cache.py +++ b/shortfin/python/shortfin_apps/llm/components/kvcache/trie_attention_cache.py @@ -154,11 +154,12 @@ def has_common_prefix(tokens1, tokens2): tokens_per_page = self.cache.tokens_per_page - number_of_pages_to_publish = len(tokens) / tokens_per_page if publish_incomplete_page: - number_of_pages_to_publish = math.ceil(number_of_pages_to_publish) + number_of_pages_to_publish = -( + len(tokens) // -tokens_per_page + ) # ceil division else: - number_of_pages_to_publish = math.floor(number_of_pages_to_publish) + number_of_pages_to_publish = len(tokens) // tokens_per_page # Create token blocks for unpublished pages start_token_index = self.number_of_published_pages * tokens_per_page