diff --git a/examples/demo_phonelm_npu.cpp b/examples/demo_phonelm_npu.cpp index 6d774230..fc40e30b 100644 --- a/examples/demo_phonelm_npu.cpp +++ b/examples/demo_phonelm_npu.cpp @@ -12,6 +12,7 @@ int main(int argc, char **argv) { cmdParser.add("vocab", 'v', "specify mllm tokenizer model path", false, "../vocab/phonelm_vocab.mllm"); cmdParser.add("merge", 'e', "specify mllm merge file path", false, "../vocab/phonelm_merges.txt"); cmdParser.add("model", 'm', "specify mllm model path", false, "../models/PhoneLM-1.5B-Instruct-128.mllm"); + cmdParser.add("decoding", 'd', "specify mllm decoding model path", false, "../models/phonelm-1.5b-droidcall-q4_0_4_4.mllm"); cmdParser.add("limits", 'l', "max KV cache size", false, 400); cmdParser.add("thread", 't', "num of threads", false, 4); cmdParser.parse_check(argc, argv); @@ -19,6 +20,7 @@ int main(int argc, char **argv) { string vocab_path = cmdParser.get("vocab"); string merge_path = cmdParser.get("merge"); string model_path = cmdParser.get("model"); + string decoding_path = cmdParser.get("decoding"); int tokens_limit = cmdParser.get("limits"); CPUBackend::cpu_threads = cmdParser.get("thread"); @@ -27,7 +29,7 @@ int main(int argc, char **argv) { auto model = PhoneLMForCausalLM_NPU(config); model.load(model_path); auto decoding_model = PhoneLMForCausalLM(config); - decoding_model.load("../models/phonelm-1.5b-instruct-q4_0_4_4.mllm"); + decoding_model.load(decoding_path); vector in_strs = { "Give me a short introduction to large language model.",