From 7ea1e4b1fd15e7d94ab3a6de9071fcfb60680cf9 Mon Sep 17 00:00:00 2001
From: chenghuaWang <2923277184@qq.com>
Date: Wed, 9 Oct 2024 03:56:11 +0000
Subject: [PATCH 1/3] fix: model path, model name, update third_party

---
 examples/demo_smollm.cpp                   |  8 ++---
 src/backends/xnnpack/third_party/XNNPACK   |  2 +-
 src/models/smollm/configuration_smollm.hpp | 16 ++++-----
 src/models/smollm/modeling_smollm.hpp      | 38 +++++++++++-----------
 src/models/smollm/tokenization_smollm.hpp  | 14 ++++----
 third_party/pybind11                       |  2 +-
 6 files changed, 40 insertions(+), 40 deletions(-)
diff --git a/examples/demo_smollm.cpp b/examples/demo_smollm.cpp
index 1ad41dbf..a15725c8 100644
--- a/examples/demo_smollm.cpp
+++ b/examples/demo_smollm.cpp
@@ -21,7 +21,7 @@ int main(int argc, char **argv) {
     cmdline::parser cmdParser;
     cmdParser.add<string>("vocab", 'v', "specify mllm tokenizer model path", false, "../vocab/smollm_vocab.mllm");
     cmdParser.add<string>("merge", 'e', "specify mllm merge file path", false, "../vocab/smollm_merges.txt");
-    cmdParser.add<string>("model", 'm', "specify mllm model path", false, "../models/SmoLlm-1.7B-q4_0x4.mllm");
+    cmdParser.add<string>("model", 'm', "specify mllm model path", false, "../models/SmolLM-1.7B-q4_0x4.mllm");
     cmdParser.add<int>("limits", 'l', "max KV cache size", false, 400);
     cmdParser.add<int>("thread", 't', "num of threads", false, 4);
     cmdParser.parse_check(argc, argv);
@@ -32,9 +32,9 @@ int main(int argc, char **argv) {
     int tokens_limit = cmdParser.get<int>("limits");
     CPUBackend::cpu_threads = cmdParser.get<int>("thread");
 
-    auto tokenizer = SmoLlmTokenizer(vocab_path, merge_path);
-    SmoLlmConfig config(tokens_limit, "1.7B", RoPEType::HFHUBROPE, 49152);
-    auto model = SmoLlmModel(config);
+    auto tokenizer = SmolLMTokenizer(vocab_path, merge_path);
+    SmolLMConfig config(tokens_limit, "1.7B", RoPEType::HFHUBROPE, 49152);
+    auto model = SmolLMModel(config);
     model.load(model_path);
 
     vector<string> in_strs = {
diff --git a/src/backends/xnnpack/third_party/XNNPACK b/src/backends/xnnpack/third_party/XNNPACK
index 331e1074..93032eaa 160000
--- a/src/backends/xnnpack/third_party/XNNPACK
+++ b/src/backends/xnnpack/third_party/XNNPACK
@@ -1 +1 @@
-Subproject commit 331e10744ffd05bbd51d310c99274e646692c079
+Subproject commit 93032eaa5f7df99d3fb5cbcf5acb862e3e09c270
diff --git a/src/models/smollm/configuration_smollm.hpp b/src/models/smollm/configuration_smollm.hpp
index a0e460af..37f8794f 100644
--- a/src/models/smollm/configuration_smollm.hpp
+++ b/src/models/smollm/configuration_smollm.hpp
@@ -1,5 +1,5 @@
 /**
- * @file configuration_smollm.hpp
+ * @file configuration_SmolLM.hpp
  * @author Chenghua Wang (chenghua.wang.edu@gmail.com)
  * @version 0.1
  * @date 2024-09-25
@@ -8,14 +8,14 @@
  *
  */
 #pragma once
-#ifndef CONFIG_SMOLLM_HPP
-#define CONFIG_SMOLLM_HPP
+#ifndef CONFIG_SmolLM_HPP
+#define CONFIG_SmolLM_HPP
 #include "Types.hpp"
 #include "models/transformer/configuration_transformer.hpp"
 
 using namespace mllm;
 
-class SmoLlMNameConfig : public TransformerNameConfig {
+class SmolLMNameConfig : public TransformerNameConfig {
 public:
     std::string blk_name;
     std::string token_embd_name;
@@ -68,7 +68,7 @@ class SmoLlMNameConfig : public TransformerNameConfig {
     }
 };
 
-class SmoLlmConfig {
+class SmolLMConfig {
 public:
     int vocab_size{};
     int hidden_dim{};
@@ -78,11 +78,11 @@ class SmoLlmConfig {
     int block_num{};
     RoPEType RoPE_type;
     int cache_limit{};
-    SmoLlMNameConfig names_config;
+    SmolLMNameConfig names_config;
     float rope_theta;
     int max_position_embeddings;
 
-    explicit SmoLlmConfig(int token_limit, string billions = "1.7B", RoPEType type = HFHUBROPE, int vocab = 32000) {
+    explicit SmolLMConfig(int token_limit, string billions = "1.7B", RoPEType type = HFHUBROPE, int vocab = 32000) {
         names_config.init(type);
         vocab_size = vocab;
         if (billions == "1.7B" || billions == "1.7b") {
@@ -109,4 +109,4 @@ class SmoLlmConfig {
     }
 };
 
-#endif // CONFIG_SMOLLM_HPP
\ No newline at end of file
+#endif // CONFIG_SmolLM_HPP
\ No newline at end of file
diff --git a/src/models/smollm/modeling_smollm.hpp b/src/models/smollm/modeling_smollm.hpp
index e1686bb5..7bd25030 100644
--- a/src/models/smollm/modeling_smollm.hpp
+++ b/src/models/smollm/modeling_smollm.hpp
@@ -1,5 +1,5 @@
 /**
- * @file tokenization_smollm.hpp
+ * @file tokenization_SmolLM.hpp
  * @author Chenghua Wang (chenghua.wang.edu@gmail.com)
  * @version 0.1
  * @date 2024-09-25
@@ -8,8 +8,8 @@
  *
  */
 #pragma once
-#ifndef MODELING_SMOLLM_HPP
-#define MODELING_SMOLLM_HPP
+#ifndef MODELING_SmolLM_HPP
+#define MODELING_SmolLM_HPP
 
 #include "Layer.hpp"
 #include "Module.hpp"
@@ -18,15 +18,15 @@
 
 using namespace mllm;
 
-class SmoLlmMLP final : public Module {
+class SmolLMMLP final : public Module {
     Layer gate_proj;
     Layer silu;
     Layer up_proj;
     Layer down_proj;
 
 public:
-    SmoLlmMLP() = default;
-    SmoLlmMLP(int hidden_dim, int ffn_hidden, const SmoLlMNameConfig &names, const string &base_name) {
+    SmolLMMLP() = default;
+    SmolLMMLP(int hidden_dim, int ffn_hidden, const SmolLMNameConfig &names, const string &base_name) {
         gate_proj = Linear(hidden_dim, ffn_hidden, false, base_name + names._gate_proj_name);
         silu = SiLU(base_name + "act");
         up_proj = Linear(hidden_dim, ffn_hidden, false, base_name + names._up_proj_name);
@@ -42,18 +42,18 @@ class SmoLlmMLP final : public Module {
     }
 };
 
-class SmoLlmBlock final : public Module {
+class SmolLMBlock final : public Module {
     MultiHeadAttention attention;
-    SmoLlmMLP mlp;
+    SmolLMMLP mlp;
     Layer norm1;
     Layer norm2;
 
 public:
-    SmoLlmBlock() = default;
-    SmoLlmBlock(int hidden_dim, int head_size, int kv_head_size, int ffn_hidden, RoPEType RoPE_type, float rope_theta, int max_position_embeddings, int cache_limit, const SmoLlMNameConfig &names, const string &base_name) {
+    SmolLMBlock() = default;
+    SmolLMBlock(int hidden_dim, int head_size, int kv_head_size, int ffn_hidden, RoPEType RoPE_type, float rope_theta, int max_position_embeddings, int cache_limit, const SmolLMNameConfig &names, const string &base_name) {
         attention = MultiHeadAttention(hidden_dim, head_size, kv_head_size, hidden_dim / head_size, SPLIT_NONE, false, false,
                                        RoPE_type, rope_theta, max_position_embeddings, cache_limit, true, false, names, base_name + names._attn_base_name);
-        mlp = SmoLlmMLP(hidden_dim, ffn_hidden, names, base_name + names._ffn_base_name);
+        mlp = SmolLMMLP(hidden_dim, ffn_hidden, names, base_name + names._ffn_base_name);
         norm1 = RMSNorm(hidden_dim, 1e-6, base_name + names._attn_norm_name);
         norm2 = RMSNorm(hidden_dim, 1e-6, base_name + names._ffn_norm_name);
     }
@@ -72,22 +72,22 @@ class SmoLlmBlock final : public Module {
     }
 };
 
-class SmoLlmModel final : public Module {
+class SmolLMModel final : public Module {
     Layer embedding;
-    vector<SmoLlmBlock> blocks;
+    vector<SmolLMBlock> blocks;
     Layer norm;
     Parameter lm_head;
 
 public:
-    explicit SmoLlmModel(const SmoLlmConfig &config) :
-        SmoLlmModel(config.vocab_size, config.hidden_dim, config.head_size, config.num_key_value_heads, config.ffn_hidden, config.block_num,
+    explicit SmolLMModel(const SmolLMConfig &config) :
+        SmolLMModel(config.vocab_size, config.hidden_dim, config.head_size, config.num_key_value_heads, config.ffn_hidden, config.block_num,
                     config.RoPE_type, config.rope_theta, config.max_position_embeddings, config.cache_limit,
                     config.names_config, config.names_config.blk_name) {
     }
-    SmoLlmModel(int vocab_size, int hidden_dim, int head_size, int kv_head_size, int ffn_hidden, int block_num, RoPEType RoPE_type, float rope_theta, int max_position_embeddings, int cache_limit,
-                const SmoLlMNameConfig &names, const string &base_name) {
+    SmolLMModel(int vocab_size, int hidden_dim, int head_size, int kv_head_size, int ffn_hidden, int block_num, RoPEType RoPE_type, float rope_theta, int max_position_embeddings, int cache_limit,
+                const SmolLMNameConfig &names, const string &base_name) {
         embedding = Embedding(vocab_size, hidden_dim, names.token_embd_name);
-        blocks = List<SmoLlmBlock>(block_num, hidden_dim, head_size, kv_head_size, ffn_hidden, RoPE_type, rope_theta, max_position_embeddings, cache_limit, names, base_name);
+        blocks = List<SmolLMBlock>(block_num, hidden_dim, head_size, kv_head_size, ffn_hidden, RoPE_type, rope_theta, max_position_embeddings, cache_limit, names, base_name);
         norm = RMSNorm(hidden_dim, 1e-6, names.post_norm_name);
         lm_head = Parameter(1, vocab_size, 1, hidden_dim,
                             names.token_embd_name + ".weight");
@@ -112,4 +112,4 @@ class SmoLlmModel final : public Module {
     }
 };
 
-#endif // MODELING_SMOLLM_HPP
\ No newline at end of file
+#endif // MODELING_SmolLM_HPP
\ No newline at end of file
diff --git a/src/models/smollm/tokenization_smollm.hpp b/src/models/smollm/tokenization_smollm.hpp
index 086581ba..f81c8e8d 100644
--- a/src/models/smollm/tokenization_smollm.hpp
+++ b/src/models/smollm/tokenization_smollm.hpp
@@ -1,5 +1,5 @@
 /**
- * @file tokenization_smollm.hpp
+ * @file tokenization_SmolLM.hpp
  * @author Chenghua Wang (chenghua.wang.edu@gmail.com)
  * @brief
  * @version 0.1
@@ -8,8 +8,8 @@
  * @copyright Copyright (c) 2024
  *
  */
-#ifndef DCLMTOKENIZATION_SMOLLM_HPP
-#define DCLMTOKENIZATION_SMOLLM_HPP
+#ifndef DCLMTOKENIZATION_SmolLM_HPP
+#define DCLMTOKENIZATION_SmolLM_HPP
 
 #include "tokenizers/BPE/Bpe.hpp"
 #include "tokenizers/Tokenizer.hpp"
@@ -52,9 +52,9 @@ static const std::vector<std::string> FIXED_PAT_STRS = {
     "(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
 };
 
-class SmoLlmTokenizer final {
+class SmolLMTokenizer final {
 public:
-    explicit SmoLlmTokenizer(const std::string &vocab_file, const std::string &merge_file, bool split_special_tokens = false) :
+    explicit SmolLMTokenizer(const std::string &vocab_file, const std::string &merge_file, bool split_special_tokens = false) :
         split_special_tokens_(split_special_tokens) {
         Module::initBackend(MLLM_CPU);
         tokenizer = new BPETokenizer(vocab_file);
@@ -100,7 +100,7 @@ class SmoLlmTokenizer final {
         tokenizer->setMergeRank(bpe_ranks_);
     }
 
-    ~SmoLlmTokenizer() {
+    ~SmolLMTokenizer() {
         delete tokenizer;
     }
 
@@ -323,4 +323,4 @@ class SmoLlmTokenizer final {
 #undef CHR
 #undef ORD
 
-#endif //! DCLMTOKENIZATION_SMOLLM_HPP
\ No newline at end of file
+#endif //! DCLMTOKENIZATION_SmolLM_HPP
\ No newline at end of file
diff --git a/third_party/pybind11 b/third_party/pybind11
index ad9fd39e..af67e873 160000
--- a/third_party/pybind11
+++ b/third_party/pybind11
@@ -1 +1 @@
-Subproject commit ad9fd39e143c8296a49a1b5b258cb6aa24e23889
+Subproject commit af67e87393b0f867ccffc2702885eea12de063fc

From 44b4219866c1635d17f8da70e77555359a0483b9 Mon Sep 17 00:00:00 2001
From: chenghuaWang <2923277184@qq.com>
Date: Wed, 9 Oct 2024 04:04:37 +0000
Subject: [PATCH 2/3] fix: rename SmolLM

---
 examples/benchmark.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/benchmark.cpp b/examples/benchmark.cpp
index 46835728..64939c2f 100644
--- a/examples/benchmark.cpp
+++ b/examples/benchmark.cpp
@@ -142,8 +142,8 @@ int main(int argc, char **argv) {
         }
         model.profiling();
     } else if (model_name == "smollm-360M") {
-        SmoLlmConfig config(tokens_limit, "360M", RoPEType::HFHUBROPE, 49152);
-        auto model = SmoLlmModel(config);
+        SmolLMConfig config(tokens_limit, "360M", RoPEType::HFHUBROPE, 49152);
+        auto model = SmolLMModel(config);
         model.setNoLoadWeightsDtype(MLLM_TYPE_Q4_0_4_4);
 
         auto input_tensor = tokens2Input(input_size);
@@ -153,8 +153,8 @@ int main(int argc, char **argv) {
         }
         model.profiling();
     } else if (model_name == "smollm-1.7B") {
-        SmoLlmConfig config(tokens_limit, "1.7B", RoPEType::HFHUBROPE, 49152);
-        auto model = SmoLlmModel(config);
+        SmolLMConfig config(tokens_limit, "1.7B", RoPEType::HFHUBROPE, 49152);
+        auto model = SmolLMModel(config);
         model.setNoLoadWeightsDtype(MLLM_TYPE_Q4_0_4_4);
 
         auto input_tensor = tokens2Input(input_size);

From 13139d196a671ca750186a574776eea162d14e09 Mon Sep 17 00:00:00 2001
From: yirongjie <yirj0809@gmail.com>
Date: Wed, 9 Oct 2024 05:12:45 +0000
Subject: [PATCH 3/3] fix: rename SmolLM model file

---
 README.md                                  | 1 +
 examples/demo_smollm.cpp                   | 2 +-
 src/models/smollm/configuration_smollm.hpp | 8 ++++----
 src/models/smollm/modeling_smollm.hpp      | 8 ++++----
 src/models/smollm/tokenization_smollm.hpp  | 8 ++++----
 5 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index 13ca6c1d..7c784e19 100644
--- a/README.md
+++ b/README.md
@@ -100,6 +100,7 @@ Wait.. why on-device multimodal LLM? - It's a key building block for [intelligen
 | [OPT 1.3B](https://github.com/facebookresearch/metaseq/tree/main/projects/OPT)                     | [✔️](https://huggingface.co/mllmTeam/opt-1.3b-mllm/tree/main)  | [✔️](https://huggingface.co/mllmTeam/opt-1.3b-mllm/tree/main)   |  |
 | [Phi-3-mini 3.8B](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct)                     |  [✔️](https://huggingface.co/mllmTeam/phi-3-mini-instruct-mllm/tree/main)   | [✔️](https://huggingface.co/mllmTeam/phi-3-mini-instruct-mllm/tree/main)   |  |
 | [MiniCPM 2B](https://huggingface.co/openbmb/MiniCPM-2B-dpo-fp32)                     |  [✔️](https://huggingface.co/mllmTeam/minicpm-2b-dpo-mllm/tree/main)   | [✔️](https://huggingface.co/mllmTeam/minicpm-2b-dpo-mllm/tree/main)   |  |
+| [SmolLM 1.7B](https://huggingface.co/HuggingFaceTB/SmolLM-1.7B-Instruct)                     |  [✔️](https://huggingface.co/mllmTeam/smollm-1.7b-instruct-mllm/tree/main)   | [✔️](https://huggingface.co/mllmTeam/smollm-1.7b-instruct-mllm/tree/main)   |  |
 
 ## Quick Start
 
diff --git a/examples/demo_smollm.cpp b/examples/demo_smollm.cpp
index a15725c8..a2ca94f3 100644
--- a/examples/demo_smollm.cpp
+++ b/examples/demo_smollm.cpp
@@ -21,7 +21,7 @@ int main(int argc, char **argv) {
     cmdline::parser cmdParser;
     cmdParser.add<string>("vocab", 'v', "specify mllm tokenizer model path", false, "../vocab/smollm_vocab.mllm");
     cmdParser.add<string>("merge", 'e', "specify mllm merge file path", false, "../vocab/smollm_merges.txt");
-    cmdParser.add<string>("model", 'm', "specify mllm model path", false, "../models/SmolLM-1.7B-q4_0x4.mllm");
+    cmdParser.add<string>("model", 'm', "specify mllm model path", false, "../models/smollm-1.7b-instruct-q4_0_4_4.mllm");
     cmdParser.add<int>("limits", 'l', "max KV cache size", false, 400);
     cmdParser.add<int>("thread", 't', "num of threads", false, 4);
     cmdParser.parse_check(argc, argv);
diff --git a/src/models/smollm/configuration_smollm.hpp b/src/models/smollm/configuration_smollm.hpp
index 37f8794f..d9e2b490 100644
--- a/src/models/smollm/configuration_smollm.hpp
+++ b/src/models/smollm/configuration_smollm.hpp
@@ -1,5 +1,5 @@
 /**
- * @file configuration_SmolLM.hpp
+ * @file configuration_smollm.hpp
  * @author Chenghua Wang (chenghua.wang.edu@gmail.com)
  * @version 0.1
  * @date 2024-09-25
@@ -8,8 +8,8 @@
  *
  */
 #pragma once
-#ifndef CONFIG_SmolLM_HPP
-#define CONFIG_SmolLM_HPP
+#ifndef CONFIG_SMOLLM_HPP
+#define CONFIG_SMOLLM_HPP
 #include "Types.hpp"
 #include "models/transformer/configuration_transformer.hpp"
 
@@ -109,4 +109,4 @@ class SmolLMConfig {
     }
 };
 
-#endif // CONFIG_SmolLM_HPP
\ No newline at end of file
+#endif // CONFIG_SMOLLM_HPP
\ No newline at end of file
diff --git a/src/models/smollm/modeling_smollm.hpp b/src/models/smollm/modeling_smollm.hpp
index 7bd25030..2dd94d72 100644
--- a/src/models/smollm/modeling_smollm.hpp
+++ b/src/models/smollm/modeling_smollm.hpp
@@ -1,5 +1,5 @@
 /**
- * @file tokenization_SmolLM.hpp
+ * @file tokenization_smollm.hpp
  * @author Chenghua Wang (chenghua.wang.edu@gmail.com)
  * @version 0.1
  * @date 2024-09-25
@@ -8,8 +8,8 @@
  *
  */
 #pragma once
-#ifndef MODELING_SmolLM_HPP
-#define MODELING_SmolLM_HPP
+#ifndef MODELING_SMOLLM_HPP
+#define MODELING_SMOLLM_HPP
 
 #include "Layer.hpp"
 #include "Module.hpp"
@@ -112,4 +112,4 @@ class SmolLMModel final : public Module {
     }
 };
 
-#endif // MODELING_SmolLM_HPP
\ No newline at end of file
+#endif // MODELING_SMOLLM_HPP
\ No newline at end of file
diff --git a/src/models/smollm/tokenization_smollm.hpp b/src/models/smollm/tokenization_smollm.hpp
index f81c8e8d..0840e844 100644
--- a/src/models/smollm/tokenization_smollm.hpp
+++ b/src/models/smollm/tokenization_smollm.hpp
@@ -1,5 +1,5 @@
 /**
- * @file tokenization_SmolLM.hpp
+ * @file tokenization_smollm.hpp
  * @author Chenghua Wang (chenghua.wang.edu@gmail.com)
  * @brief
  * @version 0.1
@@ -8,8 +8,8 @@
  * @copyright Copyright (c) 2024
  *
  */
-#ifndef DCLMTOKENIZATION_SmolLM_HPP
-#define DCLMTOKENIZATION_SmolLM_HPP
+#ifndef DCLMTOKENIZATION_SMOLLM_HPP
+#define DCLMTOKENIZATION_SMOLLM_HPP
 
 #include "tokenizers/BPE/Bpe.hpp"
 #include "tokenizers/Tokenizer.hpp"
@@ -323,4 +323,4 @@ class SmolLMTokenizer final {
 #undef CHR
 #undef ORD
 
-#endif //! DCLMTOKENIZATION_SmolLM_HPP
\ No newline at end of file
+#endif // TOKENIZATION_SMOLLM_HPP
\ No newline at end of file