From 332f205f301a5f6a5a54588c623379f5e4fd35e6 Mon Sep 17 00:00:00 2001
From: Example User <example@example.com>
Date: Mon, 3 Mar 2025 08:42:27 +0000
Subject: [PATCH 1/6] add model SeaLLMs-v3-7B-Chat

---
 xinference/core/model.py                      |  1 +
 xinference/model/llm/llm_family.json          | 51 +++++++++++++++++
 .../model/llm/llm_family_modelscope.json      | 55 +++++++++++++++++++
 3 files changed, 107 insertions(+)

diff --git a/xinference/core/model.py b/xinference/core/model.py
index 5a78b18fb3..aba3fd6bd3 100644
--- a/xinference/core/model.py
+++ b/xinference/core/model.py
@@ -76,6 +76,7 @@ class _OutOfMemoryError(Exception):
     "cogvlm2",
     "glm-4v",
     "MiniCPM-V-2.6",
+    "MiniCPM-o-2_6",
 ]
 
 XINFERENCE_TEXT_TO_IMAGE_BATCHING_ALLOWED_MODELS = ["FLUX.1-dev", "FLUX.1-schnell"]
diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json
index 3aebdd17e1..6d6a6f0d0d 100644
--- a/xinference/model/llm/llm_family.json
+++ b/xinference/model/llm/llm_family.json
@@ -10307,5 +10307,56 @@
       "<|im_end|>",
       "<|endoftext|>"
     ]
+  },
+  {
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "SeaLLMs-v3-7B-Chat",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "We introduce SeaLLMs-v3, the latest series of the SeaLLMs (Large Language Models for Southeast Asian languages) family. ",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "SeaLLMs/SeaLLMs-v3-7B-Chat"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_0",
+          "Q5_1",
+          "Q5_K_S"
+        ],
+        "model_id": "tensorblock/SeaLLMs-v3-7B-Chat-GGUF",
+        "model_file_name_template": "SeaLLMs-v3-7B-Chat.{quantization}.gguf"
+      }
+    ],
+    "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+    "stop_token_ids": [
+      151645,
+      151643
+    ],
+    "stop": [
+    "<lim_endl>", 
+    "<lim_startl>"
+    ]
   }
 ]
diff --git a/xinference/model/llm/llm_family_modelscope.json b/xinference/model/llm/llm_family_modelscope.json
index 997cbbdda1..18376efb9d 100644
--- a/xinference/model/llm/llm_family_modelscope.json
+++ b/xinference/model/llm/llm_family_modelscope.json
@@ -8072,5 +8072,60 @@
       "<|im_end|>",
       "<|endoftext|>"
     ]
+  },
+  {
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "SeaLLMs-v3-7B-Chat",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "We introduce SeaLLMs-v3, the latest series of the SeaLLMs (Large Language Models for Southeast Asian languages) family. ",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "QwenCollection/SeaLLMs-v3-7B-Chat",
+        "model_revision":"master",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_0",
+          "Q5_1",
+          "Q5_K_S"
+        ],
+        "model_id": "QuantFactory/SeaLLMs-v3-7B-Chat-GGUF",
+        "model_file_name_template": "SeaLLMs-v3-7B-Chat.{quantization}.gguf",
+        "model_revision":"master",
+        "model_hub": "modelscope"
+      }
+    ],
+    "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+    "stop_token_ids": [
+      151645,
+      151643
+    ],
+    "stop": [
+    "<lim_endl>", 
+    "<lim_startl>"
+    ]
   }
 ]

From 3dd5876f6656929625056fbf093b15ae99b2e674 Mon Sep 17 00:00:00 2001
From: Example User <example@example.com>
Date: Mon, 3 Mar 2025 13:35:34 +0000
Subject: [PATCH 2/6] add

---
 xinference/core/model.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/xinference/core/model.py b/xinference/core/model.py
index aba3fd6bd3..5a78b18fb3 100644
--- a/xinference/core/model.py
+++ b/xinference/core/model.py
@@ -76,7 +76,6 @@ class _OutOfMemoryError(Exception):
     "cogvlm2",
     "glm-4v",
     "MiniCPM-V-2.6",
-    "MiniCPM-o-2_6",
 ]
 
 XINFERENCE_TEXT_TO_IMAGE_BATCHING_ALLOWED_MODELS = ["FLUX.1-dev", "FLUX.1-schnell"]

From cf2ca88a649a240a2dc806668854788bdc3ec92a Mon Sep 17 00:00:00 2001
From: Example User <example@example.com>
Date: Mon, 3 Mar 2025 14:40:05 +0000
Subject: [PATCH 3/6] add

---
 xinference/model/llm/llm_family.json          | 32 ++++++++++++++++++
 .../model/llm/llm_family_modelscope.json      | 33 +++++++++++++++++++
 2 files changed, 65 insertions(+)

diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json
index 6d6a6f0d0d..6e31fd4083 100644
--- a/xinference/model/llm/llm_family.json
+++ b/xinference/model/llm/llm_family.json
@@ -10308,6 +10308,38 @@
       "<|endoftext|>"
     ]
   },
+  {
+    "version": 1,
+    "context_length": 8192,
+    "model_name": "moonlight-16b-a3b-instruct",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "Kimi Muon is Scalable for LLM Training",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 3,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "moonshotai/Moonlight-16B-A3B-Instruct"
+      }
+    ],
+    "chat_template":"{%- for message in messages -%}{%- if loop.first and messages[0]['role'] != 'system' -%}<|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|>{%- endif -%}{%- if message['role'] == 'system' -%}<|im_system|>{%- endif -%}{%- if message['role'] == 'user' -%}<|im_user|>{%- endif -%}{%- if message['role'] == 'assistant' -%}<|im_assistant|>{%- endif -%}{{ message['role'] }}<|im_middle|>{{message['content']}}<|im_end|>{%- endfor -%}{%- if add_generation_prompt -%}<|im_assistant|>assistant<|im_middle|>{%- endif -%}",
+    "stop_token_ids": [
+      163586
+    ],
+    "stop": [
+      "<|im_end|>"
+    ]
+  },
   {
     "version": 1,
     "context_length": 32768,
diff --git a/xinference/model/llm/llm_family_modelscope.json b/xinference/model/llm/llm_family_modelscope.json
index 18376efb9d..996db22343 100644
--- a/xinference/model/llm/llm_family_modelscope.json
+++ b/xinference/model/llm/llm_family_modelscope.json
@@ -8073,6 +8073,39 @@
       "<|endoftext|>"
     ]
   },
+  {
+    "version": 1,
+    "context_length": 8192,
+    "model_name": "moonlight-16b-a3b-instruct",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "Kimi Muon is Scalable for LLM Training",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 3,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "moonshotai/Moonlight-16B-A3B-Instruct",
+        "model_hub": "modelscope"
+      }
+    ],
+    "chat_template":"{%- for message in messages -%}{%- if loop.first and messages[0]['role'] != 'system' -%}<|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|>{%- endif -%}{%- if message['role'] == 'system' -%}<|im_system|>{%- endif -%}{%- if message['role'] == 'user' -%}<|im_user|>{%- endif -%}{%- if message['role'] == 'assistant' -%}<|im_assistant|>{%- endif -%}{{ message['role'] }}<|im_middle|>{{message['content']}}<|im_end|>{%- endfor -%}{%- if add_generation_prompt -%}<|im_assistant|>assistant<|im_middle|>{%- endif -%}",
+    "stop_token_ids": [
+      163586
+    ],
+    "stop": [
+      "<|im_end|>"
+    ]
+  },
   {
     "version": 1,
     "context_length": 32768,

From a1c92bb59406d0c806605843b3df2224aed1b4ad Mon Sep 17 00:00:00 2001
From: boyu <108654889+shuaiqidezhong@users.noreply.github.com>
Date: Tue, 4 Mar 2025 22:37:14 +0800
Subject: [PATCH 4/6] Update llm_family.json

---
 xinference/model/llm/llm_family.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json
index 6e31fd4083..e3b044f9c9 100644
--- a/xinference/model/llm/llm_family.json
+++ b/xinference/model/llm/llm_family.json
@@ -10343,7 +10343,7 @@
   {
     "version": 1,
     "context_length": 32768,
-    "model_name": "SeaLLMs-v3-7B-Chat",
+    "model_name": "SeaLLMs-v3-Chat",
     "model_lang": [
       "en",
       "zh"

From c3cf10bc30c23b1de9f79d98e58919425e3ff365 Mon Sep 17 00:00:00 2001
From: boyu <108654889+shuaiqidezhong@users.noreply.github.com>
Date: Tue, 4 Mar 2025 22:38:40 +0800
Subject: [PATCH 5/6] Update llm_family_modelscope.json

---
 xinference/model/llm/llm_family_modelscope.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xinference/model/llm/llm_family_modelscope.json b/xinference/model/llm/llm_family_modelscope.json
index 996db22343..d60800ea94 100644
--- a/xinference/model/llm/llm_family_modelscope.json
+++ b/xinference/model/llm/llm_family_modelscope.json
@@ -8109,7 +8109,7 @@
   {
     "version": 1,
     "context_length": 32768,
-    "model_name": "SeaLLMs-v3-7B-Chat",
+    "model_name": "SeaLLMs-v3-Chat",
     "model_lang": [
       "en",
       "zh"

From 58cdd141b4ac680d696216284bc8150119f77140 Mon Sep 17 00:00:00 2001
From: boyu <108654889+shuaiqidezhong@users.noreply.github.com>
Date: Thu, 6 Mar 2025 11:14:56 +0800
Subject: [PATCH 6/6] Update llm_family.json

Change length
---
 xinference/model/llm/llm_family.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json
index e3b044f9c9..8fbd4c4f5c 100644
--- a/xinference/model/llm/llm_family.json
+++ b/xinference/model/llm/llm_family.json
@@ -10342,7 +10342,7 @@
   },
   {
     "version": 1,
-    "context_length": 32768,
+    "context_length": 131072,
     "model_name": "SeaLLMs-v3-Chat",
     "model_lang": [
       "en",