diff --git a/README.md b/README.md index 13ca6c1d..7c784e19 100644 --- a/README.md +++ b/README.md @@ -100,6 +100,7 @@ Wait.. why on-device multimodal LLM? - It's a key building block for [intelligen | [OPT 1.3B](https://github.com/facebookresearch/metaseq/tree/main/projects/OPT) | [✔️](https://huggingface.co/mllmTeam/opt-1.3b-mllm/tree/main) | [✔️](https://huggingface.co/mllmTeam/opt-1.3b-mllm/tree/main) | | | [Phi-3-mini 3.8B](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) | [✔️](https://huggingface.co/mllmTeam/phi-3-mini-instruct-mllm/tree/main) | [✔️](https://huggingface.co/mllmTeam/phi-3-mini-instruct-mllm/tree/main) | | | [MiniCPM 2B](https://huggingface.co/openbmb/MiniCPM-2B-dpo-fp32) | [✔️](https://huggingface.co/mllmTeam/minicpm-2b-dpo-mllm/tree/main) | [✔️](https://huggingface.co/mllmTeam/minicpm-2b-dpo-mllm/tree/main) | | +| [SmolLM 1.7B](https://huggingface.co/HuggingFaceTB/SmolLM-1.7B-Instruct) | [✔️](https://huggingface.co/mllmTeam/smollm-1.7b-instruct-mllm/tree/main) | [✔️](https://huggingface.co/mllmTeam/smollm-1.7b-instruct-mllm/tree/main) | | ## Quick Start diff --git a/examples/benchmark.cpp b/examples/benchmark.cpp index 46835728..64939c2f 100644 --- a/examples/benchmark.cpp +++ b/examples/benchmark.cpp @@ -142,8 +142,8 @@ int main(int argc, char **argv) { } model.profiling(); } else if (model_name == "smollm-360M") { - SmoLlmConfig config(tokens_limit, "360M", RoPEType::HFHUBROPE, 49152); - auto model = SmoLlmModel(config); + SmolLMConfig config(tokens_limit, "360M", RoPEType::HFHUBROPE, 49152); + auto model = SmolLMModel(config); model.setNoLoadWeightsDtype(MLLM_TYPE_Q4_0_4_4); auto input_tensor = tokens2Input(input_size); @@ -153,8 +153,8 @@ int main(int argc, char **argv) { } model.profiling(); } else if (model_name == "smollm-1.7B") { - SmoLlmConfig config(tokens_limit, "1.7B", RoPEType::HFHUBROPE, 49152); - auto model = SmoLlmModel(config); + SmolLMConfig config(tokens_limit, "1.7B", RoPEType::HFHUBROPE, 49152); + auto model = SmolLMModel(config); model.setNoLoadWeightsDtype(MLLM_TYPE_Q4_0_4_4); auto input_tensor = tokens2Input(input_size); diff --git a/examples/demo_smollm.cpp b/examples/demo_smollm.cpp index 1ad41dbf..a2ca94f3 100644 --- a/examples/demo_smollm.cpp +++ b/examples/demo_smollm.cpp @@ -21,7 +21,7 @@ int main(int argc, char **argv) { cmdline::parser cmdParser; cmdParser.add("vocab", 'v', "specify mllm tokenizer model path", false, "../vocab/smollm_vocab.mllm"); cmdParser.add("merge", 'e', "specify mllm merge file path", false, "../vocab/smollm_merges.txt"); - cmdParser.add("model", 'm', "specify mllm model path", false, "../models/SmoLlm-1.7B-q4_0x4.mllm"); + cmdParser.add("model", 'm', "specify mllm model path", false, "../models/smollm-1.7b-instruct-q4_0_4_4.mllm"); cmdParser.add("limits", 'l', "max KV cache size", false, 400); cmdParser.add("thread", 't', "num of threads", false, 4); cmdParser.parse_check(argc, argv); @@ -32,9 +32,9 @@ int main(int argc, char **argv) { int tokens_limit = cmdParser.get("limits"); CPUBackend::cpu_threads = cmdParser.get("thread"); - auto tokenizer = SmoLlmTokenizer(vocab_path, merge_path); - SmoLlmConfig config(tokens_limit, "1.7B", RoPEType::HFHUBROPE, 49152); - auto model = SmoLlmModel(config); + auto tokenizer = SmolLMTokenizer(vocab_path, merge_path); + SmolLMConfig config(tokens_limit, "1.7B", RoPEType::HFHUBROPE, 49152); + auto model = SmolLMModel(config); model.load(model_path); vector in_strs = { diff --git a/src/backends/xnnpack/third_party/XNNPACK b/src/backends/xnnpack/third_party/XNNPACK index 331e1074..93032eaa 160000 --- a/src/backends/xnnpack/third_party/XNNPACK +++ b/src/backends/xnnpack/third_party/XNNPACK @@ -1 +1 @@ -Subproject commit 331e10744ffd05bbd51d310c99274e646692c079 +Subproject commit 93032eaa5f7df99d3fb5cbcf5acb862e3e09c270 diff --git a/src/models/smollm/configuration_smollm.hpp b/src/models/smollm/configuration_smollm.hpp index a0e460af..d9e2b490 100644 --- a/src/models/smollm/configuration_smollm.hpp +++ b/src/models/smollm/configuration_smollm.hpp @@ -15,7 +15,7 @@ using namespace mllm; -class SmoLlMNameConfig : public TransformerNameConfig { +class SmolLMNameConfig : public TransformerNameConfig { public: std::string blk_name; std::string token_embd_name; @@ -68,7 +68,7 @@ class SmoLlMNameConfig : public TransformerNameConfig { } }; -class SmoLlmConfig { +class SmolLMConfig { public: int vocab_size{}; int hidden_dim{}; @@ -78,11 +78,11 @@ class SmoLlmConfig { int block_num{}; RoPEType RoPE_type; int cache_limit{}; - SmoLlMNameConfig names_config; + SmolLMNameConfig names_config; float rope_theta; int max_position_embeddings; - explicit SmoLlmConfig(int token_limit, string billions = "1.7B", RoPEType type = HFHUBROPE, int vocab = 32000) { + explicit SmolLMConfig(int token_limit, string billions = "1.7B", RoPEType type = HFHUBROPE, int vocab = 32000) { names_config.init(type); vocab_size = vocab; if (billions == "1.7B" || billions == "1.7b") { diff --git a/src/models/smollm/modeling_smollm.hpp b/src/models/smollm/modeling_smollm.hpp index e1686bb5..2dd94d72 100644 --- a/src/models/smollm/modeling_smollm.hpp +++ b/src/models/smollm/modeling_smollm.hpp @@ -18,15 +18,15 @@ using namespace mllm; -class SmoLlmMLP final : public Module { +class SmolLMMLP final : public Module { Layer gate_proj; Layer silu; Layer up_proj; Layer down_proj; public: - SmoLlmMLP() = default; - SmoLlmMLP(int hidden_dim, int ffn_hidden, const SmoLlMNameConfig &names, const string &base_name) { + SmolLMMLP() = default; + SmolLMMLP(int hidden_dim, int ffn_hidden, const SmolLMNameConfig &names, const string &base_name) { gate_proj = Linear(hidden_dim, ffn_hidden, false, base_name + names._gate_proj_name); silu = SiLU(base_name + "act"); up_proj = Linear(hidden_dim, ffn_hidden, false, base_name + names._up_proj_name); @@ -42,18 +42,18 @@ class SmoLlmMLP final : public Module { } }; -class SmoLlmBlock final : public Module { +class SmolLMBlock final : public Module { MultiHeadAttention attention; - SmoLlmMLP mlp; + SmolLMMLP mlp; Layer norm1; Layer norm2; public: - SmoLlmBlock() = default; - SmoLlmBlock(int hidden_dim, int head_size, int kv_head_size, int ffn_hidden, RoPEType RoPE_type, float rope_theta, int max_position_embeddings, int cache_limit, const SmoLlMNameConfig &names, const string &base_name) { + SmolLMBlock() = default; + SmolLMBlock(int hidden_dim, int head_size, int kv_head_size, int ffn_hidden, RoPEType RoPE_type, float rope_theta, int max_position_embeddings, int cache_limit, const SmolLMNameConfig &names, const string &base_name) { attention = MultiHeadAttention(hidden_dim, head_size, kv_head_size, hidden_dim / head_size, SPLIT_NONE, false, false, RoPE_type, rope_theta, max_position_embeddings, cache_limit, true, false, names, base_name + names._attn_base_name); - mlp = SmoLlmMLP(hidden_dim, ffn_hidden, names, base_name + names._ffn_base_name); + mlp = SmolLMMLP(hidden_dim, ffn_hidden, names, base_name + names._ffn_base_name); norm1 = RMSNorm(hidden_dim, 1e-6, base_name + names._attn_norm_name); norm2 = RMSNorm(hidden_dim, 1e-6, base_name + names._ffn_norm_name); } @@ -72,22 +72,22 @@ class SmoLlmBlock final : public Module { } }; -class SmoLlmModel final : public Module { +class SmolLMModel final : public Module { Layer embedding; - vector blocks; + vector blocks; Layer norm; Parameter lm_head; public: - explicit SmoLlmModel(const SmoLlmConfig &config) : - SmoLlmModel(config.vocab_size, config.hidden_dim, config.head_size, config.num_key_value_heads, config.ffn_hidden, config.block_num, + explicit SmolLMModel(const SmolLMConfig &config) : + SmolLMModel(config.vocab_size, config.hidden_dim, config.head_size, config.num_key_value_heads, config.ffn_hidden, config.block_num, config.RoPE_type, config.rope_theta, config.max_position_embeddings, config.cache_limit, config.names_config, config.names_config.blk_name) { } - SmoLlmModel(int vocab_size, int hidden_dim, int head_size, int kv_head_size, int ffn_hidden, int block_num, RoPEType RoPE_type, float rope_theta, int max_position_embeddings, int cache_limit, - const SmoLlMNameConfig &names, const string &base_name) { + SmolLMModel(int vocab_size, int hidden_dim, int head_size, int kv_head_size, int ffn_hidden, int block_num, RoPEType RoPE_type, float rope_theta, int max_position_embeddings, int cache_limit, + const SmolLMNameConfig &names, const string &base_name) { embedding = Embedding(vocab_size, hidden_dim, names.token_embd_name); - blocks = List(block_num, hidden_dim, head_size, kv_head_size, ffn_hidden, RoPE_type, rope_theta, max_position_embeddings, cache_limit, names, base_name); + blocks = List(block_num, hidden_dim, head_size, kv_head_size, ffn_hidden, RoPE_type, rope_theta, max_position_embeddings, cache_limit, names, base_name); norm = RMSNorm(hidden_dim, 1e-6, names.post_norm_name); lm_head = Parameter(1, vocab_size, 1, hidden_dim, names.token_embd_name + ".weight"); diff --git a/src/models/smollm/tokenization_smollm.hpp b/src/models/smollm/tokenization_smollm.hpp index 086581ba..0840e844 100644 --- a/src/models/smollm/tokenization_smollm.hpp +++ b/src/models/smollm/tokenization_smollm.hpp @@ -52,9 +52,9 @@ static const std::vector FIXED_PAT_STRS = { "(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+", }; -class SmoLlmTokenizer final { +class SmolLMTokenizer final { public: - explicit SmoLlmTokenizer(const std::string &vocab_file, const std::string &merge_file, bool split_special_tokens = false) : + explicit SmolLMTokenizer(const std::string &vocab_file, const std::string &merge_file, bool split_special_tokens = false) : split_special_tokens_(split_special_tokens) { Module::initBackend(MLLM_CPU); tokenizer = new BPETokenizer(vocab_file); @@ -100,7 +100,7 @@ class SmoLlmTokenizer final { tokenizer->setMergeRank(bpe_ranks_); } - ~SmoLlmTokenizer() { + ~SmolLMTokenizer() { delete tokenizer; } @@ -323,4 +323,4 @@ class SmoLlmTokenizer final { #undef CHR #undef ORD -#endif //! DCLMTOKENIZATION_SMOLLM_HPP \ No newline at end of file +#endif // TOKENIZATION_SMOLLM_HPP \ No newline at end of file diff --git a/third_party/pybind11 b/third_party/pybind11 index ad9fd39e..af67e873 160000 --- a/third_party/pybind11 +++ b/third_party/pybind11 @@ -1 +1 @@ -Subproject commit ad9fd39e143c8296a49a1b5b258cb6aa24e23889 +Subproject commit af67e87393b0f867ccffc2702885eea12de063fc