From 7ea1e4b1fd15e7d94ab3a6de9071fcfb60680cf9 Mon Sep 17 00:00:00 2001 From: chenghuaWang <2923277184@qq.com> Date: Wed, 9 Oct 2024 03:56:11 +0000 Subject: [PATCH 1/3] fix: model path, model name, update third_party --- examples/demo_smollm.cpp | 8 ++--- src/backends/xnnpack/third_party/XNNPACK | 2 +- src/models/smollm/configuration_smollm.hpp | 16 ++++----- src/models/smollm/modeling_smollm.hpp | 38 +++++++++++----------- src/models/smollm/tokenization_smollm.hpp | 14 ++++---- third_party/pybind11 | 2 +- 6 files changed, 40 insertions(+), 40 deletions(-) diff --git a/examples/demo_smollm.cpp b/examples/demo_smollm.cpp index 1ad41dbf..a15725c8 100644 --- a/examples/demo_smollm.cpp +++ b/examples/demo_smollm.cpp @@ -21,7 +21,7 @@ int main(int argc, char **argv) { cmdline::parser cmdParser; cmdParser.add("vocab", 'v', "specify mllm tokenizer model path", false, "../vocab/smollm_vocab.mllm"); cmdParser.add("merge", 'e', "specify mllm merge file path", false, "../vocab/smollm_merges.txt"); - cmdParser.add("model", 'm', "specify mllm model path", false, "../models/SmoLlm-1.7B-q4_0x4.mllm"); + cmdParser.add("model", 'm', "specify mllm model path", false, "../models/SmolLM-1.7B-q4_0x4.mllm"); cmdParser.add("limits", 'l', "max KV cache size", false, 400); cmdParser.add("thread", 't', "num of threads", false, 4); cmdParser.parse_check(argc, argv); @@ -32,9 +32,9 @@ int main(int argc, char **argv) { int tokens_limit = cmdParser.get("limits"); CPUBackend::cpu_threads = cmdParser.get("thread"); - auto tokenizer = SmoLlmTokenizer(vocab_path, merge_path); - SmoLlmConfig config(tokens_limit, "1.7B", RoPEType::HFHUBROPE, 49152); - auto model = SmoLlmModel(config); + auto tokenizer = SmolLMTokenizer(vocab_path, merge_path); + SmolLMConfig config(tokens_limit, "1.7B", RoPEType::HFHUBROPE, 49152); + auto model = SmolLMModel(config); model.load(model_path); vector in_strs = { diff --git a/src/backends/xnnpack/third_party/XNNPACK b/src/backends/xnnpack/third_party/XNNPACK index 331e1074..93032eaa 160000 --- a/src/backends/xnnpack/third_party/XNNPACK +++ b/src/backends/xnnpack/third_party/XNNPACK @@ -1 +1 @@ -Subproject commit 331e10744ffd05bbd51d310c99274e646692c079 +Subproject commit 93032eaa5f7df99d3fb5cbcf5acb862e3e09c270 diff --git a/src/models/smollm/configuration_smollm.hpp b/src/models/smollm/configuration_smollm.hpp index a0e460af..37f8794f 100644 --- a/src/models/smollm/configuration_smollm.hpp +++ b/src/models/smollm/configuration_smollm.hpp @@ -1,5 +1,5 @@ /** - * @file configuration_smollm.hpp + * @file configuration_SmolLM.hpp * @author Chenghua Wang (chenghua.wang.edu@gmail.com) * @version 0.1 * @date 2024-09-25 @@ -8,14 +8,14 @@ * */ #pragma once -#ifndef CONFIG_SMOLLM_HPP -#define CONFIG_SMOLLM_HPP +#ifndef CONFIG_SmolLM_HPP +#define CONFIG_SmolLM_HPP #include "Types.hpp" #include "models/transformer/configuration_transformer.hpp" using namespace mllm; -class SmoLlMNameConfig : public TransformerNameConfig { +class SmolLMNameConfig : public TransformerNameConfig { public: std::string blk_name; std::string token_embd_name; @@ -68,7 +68,7 @@ class SmoLlMNameConfig : public TransformerNameConfig { } }; -class SmoLlmConfig { +class SmolLMConfig { public: int vocab_size{}; int hidden_dim{}; @@ -78,11 +78,11 @@ class SmoLlmConfig { int block_num{}; RoPEType RoPE_type; int cache_limit{}; - SmoLlMNameConfig names_config; + SmolLMNameConfig names_config; float rope_theta; int max_position_embeddings; - explicit SmoLlmConfig(int token_limit, string billions = "1.7B", RoPEType type = HFHUBROPE, int vocab = 32000) { + explicit SmolLMConfig(int token_limit, string billions = "1.7B", RoPEType type = HFHUBROPE, int vocab = 32000) { names_config.init(type); vocab_size = vocab; if (billions == "1.7B" || billions == "1.7b") { @@ -109,4 +109,4 @@ class SmoLlmConfig { } }; -#endif // CONFIG_SMOLLM_HPP \ No newline at end of file +#endif // CONFIG_SmolLM_HPP \ No newline at end of file diff --git a/src/models/smollm/modeling_smollm.hpp b/src/models/smollm/modeling_smollm.hpp index e1686bb5..7bd25030 100644 --- a/src/models/smollm/modeling_smollm.hpp +++ b/src/models/smollm/modeling_smollm.hpp @@ -1,5 +1,5 @@ /** - * @file tokenization_smollm.hpp + * @file tokenization_SmolLM.hpp * @author Chenghua Wang (chenghua.wang.edu@gmail.com) * @version 0.1 * @date 2024-09-25 @@ -8,8 +8,8 @@ * */ #pragma once -#ifndef MODELING_SMOLLM_HPP -#define MODELING_SMOLLM_HPP +#ifndef MODELING_SmolLM_HPP +#define MODELING_SmolLM_HPP #include "Layer.hpp" #include "Module.hpp" @@ -18,15 +18,15 @@ using namespace mllm; -class SmoLlmMLP final : public Module { +class SmolLMMLP final : public Module { Layer gate_proj; Layer silu; Layer up_proj; Layer down_proj; public: - SmoLlmMLP() = default; - SmoLlmMLP(int hidden_dim, int ffn_hidden, const SmoLlMNameConfig &names, const string &base_name) { + SmolLMMLP() = default; + SmolLMMLP(int hidden_dim, int ffn_hidden, const SmolLMNameConfig &names, const string &base_name) { gate_proj = Linear(hidden_dim, ffn_hidden, false, base_name + names._gate_proj_name); silu = SiLU(base_name + "act"); up_proj = Linear(hidden_dim, ffn_hidden, false, base_name + names._up_proj_name); @@ -42,18 +42,18 @@ class SmoLlmMLP final : public Module { } }; -class SmoLlmBlock final : public Module { +class SmolLMBlock final : public Module { MultiHeadAttention attention; - SmoLlmMLP mlp; + SmolLMMLP mlp; Layer norm1; Layer norm2; public: - SmoLlmBlock() = default; - SmoLlmBlock(int hidden_dim, int head_size, int kv_head_size, int ffn_hidden, RoPEType RoPE_type, float rope_theta, int max_position_embeddings, int cache_limit, const SmoLlMNameConfig &names, const string &base_name) { + SmolLMBlock() = default; + SmolLMBlock(int hidden_dim, int head_size, int kv_head_size, int ffn_hidden, RoPEType RoPE_type, float rope_theta, int max_position_embeddings, int cache_limit, const SmolLMNameConfig &names, const string &base_name) { attention = MultiHeadAttention(hidden_dim, head_size, kv_head_size, hidden_dim / head_size, SPLIT_NONE, false, false, RoPE_type, rope_theta, max_position_embeddings, cache_limit, true, false, names, base_name + names._attn_base_name); - mlp = SmoLlmMLP(hidden_dim, ffn_hidden, names, base_name + names._ffn_base_name); + mlp = SmolLMMLP(hidden_dim, ffn_hidden, names, base_name + names._ffn_base_name); norm1 = RMSNorm(hidden_dim, 1e-6, base_name + names._attn_norm_name); norm2 = RMSNorm(hidden_dim, 1e-6, base_name + names._ffn_norm_name); } @@ -72,22 +72,22 @@ class SmoLlmBlock final : public Module { } }; -class SmoLlmModel final : public Module { +class SmolLMModel final : public Module { Layer embedding; - vector blocks; + vector blocks; Layer norm; Parameter lm_head; public: - explicit SmoLlmModel(const SmoLlmConfig &config) : - SmoLlmModel(config.vocab_size, config.hidden_dim, config.head_size, config.num_key_value_heads, config.ffn_hidden, config.block_num, + explicit SmolLMModel(const SmolLMConfig &config) : + SmolLMModel(config.vocab_size, config.hidden_dim, config.head_size, config.num_key_value_heads, config.ffn_hidden, config.block_num, config.RoPE_type, config.rope_theta, config.max_position_embeddings, config.cache_limit, config.names_config, config.names_config.blk_name) { } - SmoLlmModel(int vocab_size, int hidden_dim, int head_size, int kv_head_size, int ffn_hidden, int block_num, RoPEType RoPE_type, float rope_theta, int max_position_embeddings, int cache_limit, - const SmoLlMNameConfig &names, const string &base_name) { + SmolLMModel(int vocab_size, int hidden_dim, int head_size, int kv_head_size, int ffn_hidden, int block_num, RoPEType RoPE_type, float rope_theta, int max_position_embeddings, int cache_limit, + const SmolLMNameConfig &names, const string &base_name) { embedding = Embedding(vocab_size, hidden_dim, names.token_embd_name); - blocks = List(block_num, hidden_dim, head_size, kv_head_size, ffn_hidden, RoPE_type, rope_theta, max_position_embeddings, cache_limit, names, base_name); + blocks = List(block_num, hidden_dim, head_size, kv_head_size, ffn_hidden, RoPE_type, rope_theta, max_position_embeddings, cache_limit, names, base_name); norm = RMSNorm(hidden_dim, 1e-6, names.post_norm_name); lm_head = Parameter(1, vocab_size, 1, hidden_dim, names.token_embd_name + ".weight"); @@ -112,4 +112,4 @@ class SmoLlmModel final : public Module { } }; -#endif // MODELING_SMOLLM_HPP \ No newline at end of file +#endif // MODELING_SmolLM_HPP \ No newline at end of file diff --git a/src/models/smollm/tokenization_smollm.hpp b/src/models/smollm/tokenization_smollm.hpp index 086581ba..f81c8e8d 100644 --- a/src/models/smollm/tokenization_smollm.hpp +++ b/src/models/smollm/tokenization_smollm.hpp @@ -1,5 +1,5 @@ /** - * @file tokenization_smollm.hpp + * @file tokenization_SmolLM.hpp * @author Chenghua Wang (chenghua.wang.edu@gmail.com) * @brief * @version 0.1 @@ -8,8 +8,8 @@ * @copyright Copyright (c) 2024 * */ -#ifndef DCLMTOKENIZATION_SMOLLM_HPP -#define DCLMTOKENIZATION_SMOLLM_HPP +#ifndef DCLMTOKENIZATION_SmolLM_HPP +#define DCLMTOKENIZATION_SmolLM_HPP #include "tokenizers/BPE/Bpe.hpp" #include "tokenizers/Tokenizer.hpp" @@ -52,9 +52,9 @@ static const std::vector FIXED_PAT_STRS = { "(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+", }; -class SmoLlmTokenizer final { +class SmolLMTokenizer final { public: - explicit SmoLlmTokenizer(const std::string &vocab_file, const std::string &merge_file, bool split_special_tokens = false) : + explicit SmolLMTokenizer(const std::string &vocab_file, const std::string &merge_file, bool split_special_tokens = false) : split_special_tokens_(split_special_tokens) { Module::initBackend(MLLM_CPU); tokenizer = new BPETokenizer(vocab_file); @@ -100,7 +100,7 @@ class SmoLlmTokenizer final { tokenizer->setMergeRank(bpe_ranks_); } - ~SmoLlmTokenizer() { + ~SmolLMTokenizer() { delete tokenizer; } @@ -323,4 +323,4 @@ class SmoLlmTokenizer final { #undef CHR #undef ORD -#endif //! DCLMTOKENIZATION_SMOLLM_HPP \ No newline at end of file +#endif //! DCLMTOKENIZATION_SmolLM_HPP \ No newline at end of file diff --git a/third_party/pybind11 b/third_party/pybind11 index ad9fd39e..af67e873 160000 --- a/third_party/pybind11 +++ b/third_party/pybind11 @@ -1 +1 @@ -Subproject commit ad9fd39e143c8296a49a1b5b258cb6aa24e23889 +Subproject commit af67e87393b0f867ccffc2702885eea12de063fc From 44b4219866c1635d17f8da70e77555359a0483b9 Mon Sep 17 00:00:00 2001 From: chenghuaWang <2923277184@qq.com> Date: Wed, 9 Oct 2024 04:04:37 +0000 Subject: [PATCH 2/3] fix: rename SmolLM --- examples/benchmark.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/benchmark.cpp b/examples/benchmark.cpp index 46835728..64939c2f 100644 --- a/examples/benchmark.cpp +++ b/examples/benchmark.cpp @@ -142,8 +142,8 @@ int main(int argc, char **argv) { } model.profiling(); } else if (model_name == "smollm-360M") { - SmoLlmConfig config(tokens_limit, "360M", RoPEType::HFHUBROPE, 49152); - auto model = SmoLlmModel(config); + SmolLMConfig config(tokens_limit, "360M", RoPEType::HFHUBROPE, 49152); + auto model = SmolLMModel(config); model.setNoLoadWeightsDtype(MLLM_TYPE_Q4_0_4_4); auto input_tensor = tokens2Input(input_size); @@ -153,8 +153,8 @@ int main(int argc, char **argv) { } model.profiling(); } else if (model_name == "smollm-1.7B") { - SmoLlmConfig config(tokens_limit, "1.7B", RoPEType::HFHUBROPE, 49152); - auto model = SmoLlmModel(config); + SmolLMConfig config(tokens_limit, "1.7B", RoPEType::HFHUBROPE, 49152); + auto model = SmolLMModel(config); model.setNoLoadWeightsDtype(MLLM_TYPE_Q4_0_4_4); auto input_tensor = tokens2Input(input_size); From 13139d196a671ca750186a574776eea162d14e09 Mon Sep 17 00:00:00 2001 From: yirongjie Date: Wed, 9 Oct 2024 05:12:45 +0000 Subject: [PATCH 3/3] fix: rename SmolLM model file --- README.md | 1 + examples/demo_smollm.cpp | 2 +- src/models/smollm/configuration_smollm.hpp | 8 ++++---- src/models/smollm/modeling_smollm.hpp | 8 ++++---- src/models/smollm/tokenization_smollm.hpp | 8 ++++---- 5 files changed, 14 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 13ca6c1d..7c784e19 100644 --- a/README.md +++ b/README.md @@ -100,6 +100,7 @@ Wait.. why on-device multimodal LLM? - It's a key building block for [intelligen | [OPT 1.3B](https://github.com/facebookresearch/metaseq/tree/main/projects/OPT) | [✔️](https://huggingface.co/mllmTeam/opt-1.3b-mllm/tree/main) | [✔️](https://huggingface.co/mllmTeam/opt-1.3b-mllm/tree/main) | | | [Phi-3-mini 3.8B](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) | [✔️](https://huggingface.co/mllmTeam/phi-3-mini-instruct-mllm/tree/main) | [✔️](https://huggingface.co/mllmTeam/phi-3-mini-instruct-mllm/tree/main) | | | [MiniCPM 2B](https://huggingface.co/openbmb/MiniCPM-2B-dpo-fp32) | [✔️](https://huggingface.co/mllmTeam/minicpm-2b-dpo-mllm/tree/main) | [✔️](https://huggingface.co/mllmTeam/minicpm-2b-dpo-mllm/tree/main) | | +| [SmolLM 1.7B](https://huggingface.co/HuggingFaceTB/SmolLM-1.7B-Instruct) | [✔️](https://huggingface.co/mllmTeam/smollm-1.7b-instruct-mllm/tree/main) | [✔️](https://huggingface.co/mllmTeam/smollm-1.7b-instruct-mllm/tree/main) | | ## Quick Start diff --git a/examples/demo_smollm.cpp b/examples/demo_smollm.cpp index a15725c8..a2ca94f3 100644 --- a/examples/demo_smollm.cpp +++ b/examples/demo_smollm.cpp @@ -21,7 +21,7 @@ int main(int argc, char **argv) { cmdline::parser cmdParser; cmdParser.add("vocab", 'v', "specify mllm tokenizer model path", false, "../vocab/smollm_vocab.mllm"); cmdParser.add("merge", 'e', "specify mllm merge file path", false, "../vocab/smollm_merges.txt"); - cmdParser.add("model", 'm', "specify mllm model path", false, "../models/SmolLM-1.7B-q4_0x4.mllm"); + cmdParser.add("model", 'm', "specify mllm model path", false, "../models/smollm-1.7b-instruct-q4_0_4_4.mllm"); cmdParser.add("limits", 'l', "max KV cache size", false, 400); cmdParser.add("thread", 't', "num of threads", false, 4); cmdParser.parse_check(argc, argv); diff --git a/src/models/smollm/configuration_smollm.hpp b/src/models/smollm/configuration_smollm.hpp index 37f8794f..d9e2b490 100644 --- a/src/models/smollm/configuration_smollm.hpp +++ b/src/models/smollm/configuration_smollm.hpp @@ -1,5 +1,5 @@ /** - * @file configuration_SmolLM.hpp + * @file configuration_smollm.hpp * @author Chenghua Wang (chenghua.wang.edu@gmail.com) * @version 0.1 * @date 2024-09-25 @@ -8,8 +8,8 @@ * */ #pragma once -#ifndef CONFIG_SmolLM_HPP -#define CONFIG_SmolLM_HPP +#ifndef CONFIG_SMOLLM_HPP +#define CONFIG_SMOLLM_HPP #include "Types.hpp" #include "models/transformer/configuration_transformer.hpp" @@ -109,4 +109,4 @@ class SmolLMConfig { } }; -#endif // CONFIG_SmolLM_HPP \ No newline at end of file +#endif // CONFIG_SMOLLM_HPP \ No newline at end of file diff --git a/src/models/smollm/modeling_smollm.hpp b/src/models/smollm/modeling_smollm.hpp index 7bd25030..2dd94d72 100644 --- a/src/models/smollm/modeling_smollm.hpp +++ b/src/models/smollm/modeling_smollm.hpp @@ -1,5 +1,5 @@ /** - * @file tokenization_SmolLM.hpp + * @file tokenization_smollm.hpp * @author Chenghua Wang (chenghua.wang.edu@gmail.com) * @version 0.1 * @date 2024-09-25 @@ -8,8 +8,8 @@ * */ #pragma once -#ifndef MODELING_SmolLM_HPP -#define MODELING_SmolLM_HPP +#ifndef MODELING_SMOLLM_HPP +#define MODELING_SMOLLM_HPP #include "Layer.hpp" #include "Module.hpp" @@ -112,4 +112,4 @@ class SmolLMModel final : public Module { } }; -#endif // MODELING_SmolLM_HPP \ No newline at end of file +#endif // MODELING_SMOLLM_HPP \ No newline at end of file diff --git a/src/models/smollm/tokenization_smollm.hpp b/src/models/smollm/tokenization_smollm.hpp index f81c8e8d..0840e844 100644 --- a/src/models/smollm/tokenization_smollm.hpp +++ b/src/models/smollm/tokenization_smollm.hpp @@ -1,5 +1,5 @@ /** - * @file tokenization_SmolLM.hpp + * @file tokenization_smollm.hpp * @author Chenghua Wang (chenghua.wang.edu@gmail.com) * @brief * @version 0.1 @@ -8,8 +8,8 @@ * @copyright Copyright (c) 2024 * */ -#ifndef DCLMTOKENIZATION_SmolLM_HPP -#define DCLMTOKENIZATION_SmolLM_HPP +#ifndef DCLMTOKENIZATION_SMOLLM_HPP +#define DCLMTOKENIZATION_SMOLLM_HPP #include "tokenizers/BPE/Bpe.hpp" #include "tokenizers/Tokenizer.hpp" @@ -323,4 +323,4 @@ class SmolLMTokenizer final { #undef CHR #undef ORD -#endif //! DCLMTOKENIZATION_SmolLM_HPP \ No newline at end of file +#endif // TOKENIZATION_SMOLLM_HPP \ No newline at end of file