Merge pull request #157 from chenghuaWang/main

fix: SmolLM name
UbiquitousLearning · Oct 9, 2024 · 245871b · 245871b
2 parents d6ccf32 + 13139d1
commit 245871b
Show file tree

Hide file tree

Showing 8 changed files with 34 additions and 33 deletions.
diff --git a/README.md b/README.md
@@ -100,6 +100,7 @@ Wait.. why on-device multimodal LLM? - It's a key building block for [intelligen
 | [OPT 1.3B](https://github.com/facebookresearch/metaseq/tree/main/projects/OPT)                     | [✔️](https://huggingface.co/mllmTeam/opt-1.3b-mllm/tree/main)  | [✔️](https://huggingface.co/mllmTeam/opt-1.3b-mllm/tree/main)   |  |
 | [Phi-3-mini 3.8B](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct)                     |  [✔️](https://huggingface.co/mllmTeam/phi-3-mini-instruct-mllm/tree/main)   | [✔️](https://huggingface.co/mllmTeam/phi-3-mini-instruct-mllm/tree/main)   |  |
 | [MiniCPM 2B](https://huggingface.co/openbmb/MiniCPM-2B-dpo-fp32)                     |  [✔️](https://huggingface.co/mllmTeam/minicpm-2b-dpo-mllm/tree/main)   | [✔️](https://huggingface.co/mllmTeam/minicpm-2b-dpo-mllm/tree/main)   |  |
+| [SmolLM 1.7B](https://huggingface.co/HuggingFaceTB/SmolLM-1.7B-Instruct)                     |  [✔️](https://huggingface.co/mllmTeam/smollm-1.7b-instruct-mllm/tree/main)   | [✔️](https://huggingface.co/mllmTeam/smollm-1.7b-instruct-mllm/tree/main)   |  |
 
 ## Quick Start
 

diff --git a/examples/benchmark.cpp b/examples/benchmark.cpp
@@ -142,8 +142,8 @@ int main(int argc, char **argv) {
         }
         model.profiling();
     } else if (model_name == "smollm-360M") {
-        SmoLlmConfig config(tokens_limit, "360M", RoPEType::HFHUBROPE, 49152);
-        auto model = SmoLlmModel(config);
+        SmolLMConfig config(tokens_limit, "360M", RoPEType::HFHUBROPE, 49152);
+        auto model = SmolLMModel(config);
         model.setNoLoadWeightsDtype(MLLM_TYPE_Q4_0_4_4);
 
         auto input_tensor = tokens2Input(input_size);
@@ -153,8 +153,8 @@ int main(int argc, char **argv) {
         }
         model.profiling();
     } else if (model_name == "smollm-1.7B") {
-        SmoLlmConfig config(tokens_limit, "1.7B", RoPEType::HFHUBROPE, 49152);
-        auto model = SmoLlmModel(config);
+        SmolLMConfig config(tokens_limit, "1.7B", RoPEType::HFHUBROPE, 49152);
+        auto model = SmolLMModel(config);
         model.setNoLoadWeightsDtype(MLLM_TYPE_Q4_0_4_4);
 
         auto input_tensor = tokens2Input(input_size);

diff --git a/examples/demo_smollm.cpp b/examples/demo_smollm.cpp
@@ -21,7 +21,7 @@ int main(int argc, char **argv) {
     cmdline::parser cmdParser;
     cmdParser.add<string>("vocab", 'v', "specify mllm tokenizer model path", false, "../vocab/smollm_vocab.mllm");
     cmdParser.add<string>("merge", 'e', "specify mllm merge file path", false, "../vocab/smollm_merges.txt");
-    cmdParser.add<string>("model", 'm', "specify mllm model path", false, "../models/SmoLlm-1.7B-q4_0x4.mllm");
+    cmdParser.add<string>("model", 'm', "specify mllm model path", false, "../models/smollm-1.7b-instruct-q4_0_4_4.mllm");
     cmdParser.add<int>("limits", 'l', "max KV cache size", false, 400);
     cmdParser.add<int>("thread", 't', "num of threads", false, 4);
     cmdParser.parse_check(argc, argv);
@@ -32,9 +32,9 @@ int main(int argc, char **argv) {
     int tokens_limit = cmdParser.get<int>("limits");
     CPUBackend::cpu_threads = cmdParser.get<int>("thread");
 
-    auto tokenizer = SmoLlmTokenizer(vocab_path, merge_path);
-    SmoLlmConfig config(tokens_limit, "1.7B", RoPEType::HFHUBROPE, 49152);
-    auto model = SmoLlmModel(config);
+    auto tokenizer = SmolLMTokenizer(vocab_path, merge_path);
+    SmolLMConfig config(tokens_limit, "1.7B", RoPEType::HFHUBROPE, 49152);
+    auto model = SmolLMModel(config);
     model.load(model_path);
 
     vector<string> in_strs = {

diff --git a/src/backends/xnnpack/third_party/XNNPACK b/src/backends/xnnpack/third_party/XNNPACK
diff --git a/src/models/smollm/configuration_smollm.hpp b/src/models/smollm/configuration_smollm.hpp
@@ -15,7 +15,7 @@
 
 using namespace mllm;
 
-class SmoLlMNameConfig : public TransformerNameConfig {
+class SmolLMNameConfig : public TransformerNameConfig {
 public:
     std::string blk_name;
     std::string token_embd_name;
@@ -68,7 +68,7 @@ class SmoLlMNameConfig : public TransformerNameConfig {
     }
 };
 
-class SmoLlmConfig {
+class SmolLMConfig {
 public:
     int vocab_size{};
     int hidden_dim{};
@@ -78,11 +78,11 @@ class SmoLlmConfig {
     int block_num{};
     RoPEType RoPE_type;
     int cache_limit{};
-    SmoLlMNameConfig names_config;
+    SmolLMNameConfig names_config;
     float rope_theta;
     int max_position_embeddings;
 
-    explicit SmoLlmConfig(int token_limit, string billions = "1.7B", RoPEType type = HFHUBROPE, int vocab = 32000) {
+    explicit SmolLMConfig(int token_limit, string billions = "1.7B", RoPEType type = HFHUBROPE, int vocab = 32000) {
         names_config.init(type);
         vocab_size = vocab;
         if (billions == "1.7B" || billions == "1.7b") {

diff --git a/src/models/smollm/modeling_smollm.hpp b/src/models/smollm/modeling_smollm.hpp
@@ -18,15 +18,15 @@
 
 using namespace mllm;
 
-class SmoLlmMLP final : public Module {
+class SmolLMMLP final : public Module {
     Layer gate_proj;
     Layer silu;
     Layer up_proj;
     Layer down_proj;
 
 public:
-    SmoLlmMLP() = default;
-    SmoLlmMLP(int hidden_dim, int ffn_hidden, const SmoLlMNameConfig &names, const string &base_name) {
+    SmolLMMLP() = default;
+    SmolLMMLP(int hidden_dim, int ffn_hidden, const SmolLMNameConfig &names, const string &base_name) {
         gate_proj = Linear(hidden_dim, ffn_hidden, false, base_name + names._gate_proj_name);
         silu = SiLU(base_name + "act");
         up_proj = Linear(hidden_dim, ffn_hidden, false, base_name + names._up_proj_name);
@@ -42,18 +42,18 @@ class SmoLlmMLP final : public Module {
     }
 };
 
-class SmoLlmBlock final : public Module {
+class SmolLMBlock final : public Module {
     MultiHeadAttention attention;
-    SmoLlmMLP mlp;
+    SmolLMMLP mlp;
     Layer norm1;
     Layer norm2;
 
 public:
-    SmoLlmBlock() = default;
-    SmoLlmBlock(int hidden_dim, int head_size, int kv_head_size, int ffn_hidden, RoPEType RoPE_type, float rope_theta, int max_position_embeddings, int cache_limit, const SmoLlMNameConfig &names, const string &base_name) {
+    SmolLMBlock() = default;
+    SmolLMBlock(int hidden_dim, int head_size, int kv_head_size, int ffn_hidden, RoPEType RoPE_type, float rope_theta, int max_position_embeddings, int cache_limit, const SmolLMNameConfig &names, const string &base_name) {
         attention = MultiHeadAttention(hidden_dim, head_size, kv_head_size, hidden_dim / head_size, SPLIT_NONE, false, false,
                                        RoPE_type, rope_theta, max_position_embeddings, cache_limit, true, false, names, base_name + names._attn_base_name);
-        mlp = SmoLlmMLP(hidden_dim, ffn_hidden, names, base_name + names._ffn_base_name);
+        mlp = SmolLMMLP(hidden_dim, ffn_hidden, names, base_name + names._ffn_base_name);
         norm1 = RMSNorm(hidden_dim, 1e-6, base_name + names._attn_norm_name);
         norm2 = RMSNorm(hidden_dim, 1e-6, base_name + names._ffn_norm_name);
     }
@@ -72,22 +72,22 @@ class SmoLlmBlock final : public Module {
     }
 };
 
-class SmoLlmModel final : public Module {
+class SmolLMModel final : public Module {
     Layer embedding;
-    vector<SmoLlmBlock> blocks;
+    vector<SmolLMBlock> blocks;
     Layer norm;
     Parameter lm_head;
 
 public:
-    explicit SmoLlmModel(const SmoLlmConfig &config) :
-        SmoLlmModel(config.vocab_size, config.hidden_dim, config.head_size, config.num_key_value_heads, config.ffn_hidden, config.block_num,
+    explicit SmolLMModel(const SmolLMConfig &config) :
+        SmolLMModel(config.vocab_size, config.hidden_dim, config.head_size, config.num_key_value_heads, config.ffn_hidden, config.block_num,
                     config.RoPE_type, config.rope_theta, config.max_position_embeddings, config.cache_limit,
                     config.names_config, config.names_config.blk_name) {
     }
-    SmoLlmModel(int vocab_size, int hidden_dim, int head_size, int kv_head_size, int ffn_hidden, int block_num, RoPEType RoPE_type, float rope_theta, int max_position_embeddings, int cache_limit,
-                const SmoLlMNameConfig &names, const string &base_name) {
+    SmolLMModel(int vocab_size, int hidden_dim, int head_size, int kv_head_size, int ffn_hidden, int block_num, RoPEType RoPE_type, float rope_theta, int max_position_embeddings, int cache_limit,
+                const SmolLMNameConfig &names, const string &base_name) {
         embedding = Embedding(vocab_size, hidden_dim, names.token_embd_name);
-        blocks = List<SmoLlmBlock>(block_num, hidden_dim, head_size, kv_head_size, ffn_hidden, RoPE_type, rope_theta, max_position_embeddings, cache_limit, names, base_name);
+        blocks = List<SmolLMBlock>(block_num, hidden_dim, head_size, kv_head_size, ffn_hidden, RoPE_type, rope_theta, max_position_embeddings, cache_limit, names, base_name);
         norm = RMSNorm(hidden_dim, 1e-6, names.post_norm_name);
         lm_head = Parameter(1, vocab_size, 1, hidden_dim,
                             names.token_embd_name + ".weight");

diff --git a/src/models/smollm/tokenization_smollm.hpp b/src/models/smollm/tokenization_smollm.hpp
@@ -52,9 +52,9 @@ static const std::vector<std::string> FIXED_PAT_STRS = {
     "(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
 };
 
-class SmoLlmTokenizer final {
+class SmolLMTokenizer final {
 public:
-    explicit SmoLlmTokenizer(const std::string &vocab_file, const std::string &merge_file, bool split_special_tokens = false) :
+    explicit SmolLMTokenizer(const std::string &vocab_file, const std::string &merge_file, bool split_special_tokens = false) :
         split_special_tokens_(split_special_tokens) {
         Module::initBackend(MLLM_CPU);
         tokenizer = new BPETokenizer(vocab_file);
@@ -100,7 +100,7 @@ class SmoLlmTokenizer final {
         tokenizer->setMergeRank(bpe_ranks_);
     }
 
-    ~SmoLlmTokenizer() {
+    ~SmolLMTokenizer() {
         delete tokenizer;
     }
 
@@ -323,4 +323,4 @@ class SmoLlmTokenizer final {
 #undef CHR
 #undef ORD
 
-#endif //! DCLMTOKENIZATION_SMOLLM_HPP
+#endif // TOKENIZATION_SMOLLM_HPP
diff --git a/third_party/pybind11 b/third_party/pybind11
+5 −0		.github/workflows/ci.yml
+391 −0		docs/advanced/deadlock.md
+8 −1		docs/advanced/misc.rst
+3 −3		include/pybind11/cast.h
+15 −1		include/pybind11/detail/common.h
+3 −2		include/pybind11/detail/internals.h
+1 −1		include/pybind11/detail/type_caster_base.h
+4 −4		include/pybind11/eval.h
+2 −0		include/pybind11/gil_safe_call_once.h
+6 −8		include/pybind11/pybind11.h
+2 −2		include/pybind11/pytypes.h
+38 −2		include/pybind11/stl_bind.h
+2 −2		tests/conftest.py
+1 −0		tests/env.py
+7 −6		tests/requirements.txt
+4 −0		tests/test_buffers.py
+2 −2		tests/test_call_policies.cpp
+5 −0		tests/test_call_policies.py
+1 −1		tests/test_callbacks.cpp
+2 −0		tests/test_callbacks.py
+1 −1		tests/test_class.py
+11 −5		tests/test_cmake_build/CMakeLists.txt
+5 −1		tests/test_cpp_conduit.py
+2 −2		tests/test_custom_type_setup.py
+2 −0		tests/test_eigen_matrix.py
+3 −0		tests/test_eigen_tensor.py
+5 −3		tests/test_embed/CMakeLists.txt
+3 −0		tests/test_enum.py
+1 −1		tests/test_eval.py
+3 −0		tests/test_exceptions.py
+13 −0		tests/test_factory_constructors.py
+16 −0		tests/test_gil_scoped.py
+6 −0		tests/test_iostream.py
+26 −0		tests/test_kwargs_and_defaults.cpp
+13 −0		tests/test_kwargs_and_defaults.py
+7 −1		tests/test_methods_and_attributes.py
+5 −2		tests/test_modules.py
+15 −9		tests/test_multiple_inheritance.py
+2 −1		tests/test_numpy_array.py
+3 −0		tests/test_operator_overloading.py
+1 −1		tests/test_pickling.py
+7 −1		tests/test_pytypes.py
+14 −5		tests/test_sequences_and_iterators.py
+12 −0		tests/test_smart_ptr.py
+2 −0		tests/test_stl.py
+19 −0		tests/test_stl_binders.py
+2 −1		tests/test_type_caster_pyobject_ptr.cpp
+7 −2		tests/test_virtual_functions.py
+0 −4		tools/pybind11NewTools.cmake