From 332f205f301a5f6a5a54588c623379f5e4fd35e6 Mon Sep 17 00:00:00 2001 From: Example User Date: Mon, 3 Mar 2025 08:42:27 +0000 Subject: [PATCH 1/6] add model SeaLLMs-v3-7B-Chat --- xinference/core/model.py | 1 + xinference/model/llm/llm_family.json | 51 +++++++++++++++++ .../model/llm/llm_family_modelscope.json | 55 +++++++++++++++++++ 3 files changed, 107 insertions(+) diff --git a/xinference/core/model.py b/xinference/core/model.py index 5a78b18fb3..aba3fd6bd3 100644 --- a/xinference/core/model.py +++ b/xinference/core/model.py @@ -76,6 +76,7 @@ class _OutOfMemoryError(Exception): "cogvlm2", "glm-4v", "MiniCPM-V-2.6", + "MiniCPM-o-2_6", ] XINFERENCE_TEXT_TO_IMAGE_BATCHING_ALLOWED_MODELS = ["FLUX.1-dev", "FLUX.1-schnell"] diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json index 3aebdd17e1..6d6a6f0d0d 100644 --- a/xinference/model/llm/llm_family.json +++ b/xinference/model/llm/llm_family.json @@ -10307,5 +10307,56 @@ "<|im_end|>", "<|endoftext|>" ] + }, + { + "version": 1, + "context_length": 32768, + "model_name": "SeaLLMs-v3-7B-Chat", + "model_lang": [ + "en", + "zh" + ], + "model_ability": [ + "chat" + ], + "model_description": "We introduce SeaLLMs-v3, the latest series of the SeaLLMs (Large Language Models for Southeast Asian languages) family. ", + "model_specs": [ + { + "model_format": "pytorch", + "model_size_in_billions": 7, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "SeaLLMs/SeaLLMs-v3-7B-Chat" + }, + { + "model_format": "ggufv2", + "model_size_in_billions": 7, + "quantizations": [ + "Q2_K", + "Q3_K_L", + "Q3_K_M", + "Q3_K_S", + "Q4_K_M", + "Q4_K_S", + "Q5_0", + "Q5_1", + "Q5_K_S" + ], + "model_id": "tensorblock/SeaLLMs-v3-7B-Chat-GGUF", + "model_file_name_template": "SeaLLMs-v3-7B-Chat.{quantization}.gguf" + } + ], + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "stop_token_ids": [ + 151645, + 151643 + ], + "stop": [ + "", + "" + ] } ] diff --git a/xinference/model/llm/llm_family_modelscope.json b/xinference/model/llm/llm_family_modelscope.json index 997cbbdda1..18376efb9d 100644 --- a/xinference/model/llm/llm_family_modelscope.json +++ b/xinference/model/llm/llm_family_modelscope.json @@ -8072,5 +8072,60 @@ "<|im_end|>", "<|endoftext|>" ] + }, + { + "version": 1, + "context_length": 32768, + "model_name": "SeaLLMs-v3-7B-Chat", + "model_lang": [ + "en", + "zh" + ], + "model_ability": [ + "chat" + ], + "model_description": "We introduce SeaLLMs-v3, the latest series of the SeaLLMs (Large Language Models for Southeast Asian languages) family. ", + "model_specs": [ + { + "model_format": "pytorch", + "model_size_in_billions": 7, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "QwenCollection/SeaLLMs-v3-7B-Chat", + "model_revision":"master", + "model_hub": "modelscope" + }, + { + "model_format": "ggufv2", + "model_size_in_billions": 7, + "quantizations": [ + "Q2_K", + "Q3_K_L", + "Q3_K_M", + "Q3_K_S", + "Q4_K_M", + "Q4_K_S", + "Q5_0", + "Q5_1", + "Q5_K_S" + ], + "model_id": "QuantFactory/SeaLLMs-v3-7B-Chat-GGUF", + "model_file_name_template": "SeaLLMs-v3-7B-Chat.{quantization}.gguf", + "model_revision":"master", + "model_hub": "modelscope" + } + ], + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "stop_token_ids": [ + 151645, + 151643 + ], + "stop": [ + "", + "" + ] } ] From 3dd5876f6656929625056fbf093b15ae99b2e674 Mon Sep 17 00:00:00 2001 From: Example User Date: Mon, 3 Mar 2025 13:35:34 +0000 Subject: [PATCH 2/6] add --- xinference/core/model.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xinference/core/model.py b/xinference/core/model.py index aba3fd6bd3..5a78b18fb3 100644 --- a/xinference/core/model.py +++ b/xinference/core/model.py @@ -76,7 +76,6 @@ class _OutOfMemoryError(Exception): "cogvlm2", "glm-4v", "MiniCPM-V-2.6", - "MiniCPM-o-2_6", ] XINFERENCE_TEXT_TO_IMAGE_BATCHING_ALLOWED_MODELS = ["FLUX.1-dev", "FLUX.1-schnell"] From cf2ca88a649a240a2dc806668854788bdc3ec92a Mon Sep 17 00:00:00 2001 From: Example User Date: Mon, 3 Mar 2025 14:40:05 +0000 Subject: [PATCH 3/6] add --- xinference/model/llm/llm_family.json | 32 ++++++++++++++++++ .../model/llm/llm_family_modelscope.json | 33 +++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json index 6d6a6f0d0d..6e31fd4083 100644 --- a/xinference/model/llm/llm_family.json +++ b/xinference/model/llm/llm_family.json @@ -10308,6 +10308,38 @@ "<|endoftext|>" ] }, + { + "version": 1, + "context_length": 8192, + "model_name": "moonlight-16b-a3b-instruct", + "model_lang": [ + "en", + "zh" + ], + "model_ability": [ + "chat" + ], + "model_description": "Kimi Muon is Scalable for LLM Training", + "model_specs": [ + { + "model_format": "pytorch", + "model_size_in_billions": 3, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "moonshotai/Moonlight-16B-A3B-Instruct" + } + ], + "chat_template":"{%- for message in messages -%}{%- if loop.first and messages[0]['role'] != 'system' -%}<|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|>{%- endif -%}{%- if message['role'] == 'system' -%}<|im_system|>{%- endif -%}{%- if message['role'] == 'user' -%}<|im_user|>{%- endif -%}{%- if message['role'] == 'assistant' -%}<|im_assistant|>{%- endif -%}{{ message['role'] }}<|im_middle|>{{message['content']}}<|im_end|>{%- endfor -%}{%- if add_generation_prompt -%}<|im_assistant|>assistant<|im_middle|>{%- endif -%}", + "stop_token_ids": [ + 163586 + ], + "stop": [ + "<|im_end|>" + ] + }, { "version": 1, "context_length": 32768, diff --git a/xinference/model/llm/llm_family_modelscope.json b/xinference/model/llm/llm_family_modelscope.json index 18376efb9d..996db22343 100644 --- a/xinference/model/llm/llm_family_modelscope.json +++ b/xinference/model/llm/llm_family_modelscope.json @@ -8073,6 +8073,39 @@ "<|endoftext|>" ] }, + { + "version": 1, + "context_length": 8192, + "model_name": "moonlight-16b-a3b-instruct", + "model_lang": [ + "en", + "zh" + ], + "model_ability": [ + "chat" + ], + "model_description": "Kimi Muon is Scalable for LLM Training", + "model_specs": [ + { + "model_format": "pytorch", + "model_size_in_billions": 3, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "moonshotai/Moonlight-16B-A3B-Instruct", + "model_hub": "modelscope" + } + ], + "chat_template":"{%- for message in messages -%}{%- if loop.first and messages[0]['role'] != 'system' -%}<|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|>{%- endif -%}{%- if message['role'] == 'system' -%}<|im_system|>{%- endif -%}{%- if message['role'] == 'user' -%}<|im_user|>{%- endif -%}{%- if message['role'] == 'assistant' -%}<|im_assistant|>{%- endif -%}{{ message['role'] }}<|im_middle|>{{message['content']}}<|im_end|>{%- endfor -%}{%- if add_generation_prompt -%}<|im_assistant|>assistant<|im_middle|>{%- endif -%}", + "stop_token_ids": [ + 163586 + ], + "stop": [ + "<|im_end|>" + ] + }, { "version": 1, "context_length": 32768, From a1c92bb59406d0c806605843b3df2224aed1b4ad Mon Sep 17 00:00:00 2001 From: boyu <108654889+shuaiqidezhong@users.noreply.github.com> Date: Tue, 4 Mar 2025 22:37:14 +0800 Subject: [PATCH 4/6] Update llm_family.json --- xinference/model/llm/llm_family.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json index 6e31fd4083..e3b044f9c9 100644 --- a/xinference/model/llm/llm_family.json +++ b/xinference/model/llm/llm_family.json @@ -10343,7 +10343,7 @@ { "version": 1, "context_length": 32768, - "model_name": "SeaLLMs-v3-7B-Chat", + "model_name": "SeaLLMs-v3-Chat", "model_lang": [ "en", "zh" From c3cf10bc30c23b1de9f79d98e58919425e3ff365 Mon Sep 17 00:00:00 2001 From: boyu <108654889+shuaiqidezhong@users.noreply.github.com> Date: Tue, 4 Mar 2025 22:38:40 +0800 Subject: [PATCH 5/6] Update llm_family_modelscope.json --- xinference/model/llm/llm_family_modelscope.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xinference/model/llm/llm_family_modelscope.json b/xinference/model/llm/llm_family_modelscope.json index 996db22343..d60800ea94 100644 --- a/xinference/model/llm/llm_family_modelscope.json +++ b/xinference/model/llm/llm_family_modelscope.json @@ -8109,7 +8109,7 @@ { "version": 1, "context_length": 32768, - "model_name": "SeaLLMs-v3-7B-Chat", + "model_name": "SeaLLMs-v3-Chat", "model_lang": [ "en", "zh" From 58cdd141b4ac680d696216284bc8150119f77140 Mon Sep 17 00:00:00 2001 From: boyu <108654889+shuaiqidezhong@users.noreply.github.com> Date: Thu, 6 Mar 2025 11:14:56 +0800 Subject: [PATCH 6/6] Update llm_family.json Change length --- xinference/model/llm/llm_family.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json index e3b044f9c9..8fbd4c4f5c 100644 --- a/xinference/model/llm/llm_family.json +++ b/xinference/model/llm/llm_family.json @@ -10342,7 +10342,7 @@ }, { "version": 1, - "context_length": 32768, + "context_length": 131072, "model_name": "SeaLLMs-v3-Chat", "model_lang": [ "en",