From 5204ff5c3feeb96e8a6eea65dfcb78395f90d4d8 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Mon, 27 Jan 2025 13:26:44 +0800 Subject: [PATCH] [Bugfix] Fix Granite 3.0 MoE model loading (#12446) Signed-off-by: DarkLight1337 --- vllm/model_executor/models/granitemoe.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm/model_executor/models/granitemoe.py b/vllm/model_executor/models/granitemoe.py index 51296ef0cc08e..b518a0a6cbdee 100644 --- a/vllm/model_executor/models/granitemoe.py +++ b/vllm/model_executor/models/granitemoe.py @@ -348,6 +348,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.config = config self.lora_config = lora_config + self.quant_config = quant_config # Required by MixtralForCausalLM self.model = GraniteMoeModel(vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model"))