feat: add PhoneLM

UbiquitousLearning · Oct 24, 2024 · 70b3174 · 70b3174
1 parent 722d27d
commit 70b3174
Show file tree

Hide file tree

Showing 8 changed files with 49,307 additions and 4 deletions.
diff --git a/.gitignore b/.gitignore
@@ -34,8 +34,8 @@ mllm.egg-info/
 
 examples/demo_deepseek.cpp
 src/models/deepseek/*
-examples/demo_phonellm.cpp
-src/models/phonellm/*
+# examples/demo_phonelm.cpp
+# src/models/phonelm/*
 examples/demo_minicpm3.cpp
 src/models/minicpm3/*
 examples/demo.cpp
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -70,7 +70,7 @@ func_llm_add_executable(demo_minicpm)
 func_llm_add_executable(demo_smollm)
 func_llm_add_executable(demo_openelm)
 func_llm_add_executable(demo_dclm)
-# func_llm_add_executable(demo_phonellm)
+func_llm_add_executable(demo_phonelm)
 
 func_vlm_add_executable(demo_llava)
 func_vlm_add_executable(demo_fuyu)

diff --git a/examples/demo_phonelm.cpp b/examples/demo_phonelm.cpp
@@ -0,0 +1,60 @@
+#include <iostream>
+#include <vector>
+#include "Types.hpp"
+#include "cmdline.h"
+#include "models/phonelm/modeling_phonelm.hpp"
+#include "models/smollm/tokenization_smollm.hpp"
+
+using namespace mllm;
+
+int main(int argc, char **argv) {
+    cmdline::parser cmdParser;
+    cmdParser.add<string>("vocab", 'v', "specify mllm tokenizer model path", false, "../vocab/phonelm_vocab.mllm");
+    cmdParser.add<string>("merge", 'e', "specify mllm merge file path", false, "../vocab/phonelm_merges.txt");
+    cmdParser.add<string>("model", 'o', "specify mllm model path", false, "../models/phonelm-1.5b-instruct-fp32.mllm");
+    cmdParser.add<int>("limits", 'l', "max KV cache size", false, 400);
+    cmdParser.add<int>("thread", 't', "num of threads", false, 40);
+    cmdParser.parse_check(argc, argv);
+
+    string merge_path = cmdParser.get<string>("merge");
+    string vocab_path = cmdParser.get<string>("vocab");
+    string model_path = cmdParser.get<string>("model");
+    int tokens_limit = cmdParser.get<int>("limits");
+    CPUBackend::cpu_threads = cmdParser.get<int>("thread");
+
+    auto tokenizer = SmolLMTokenizer(vocab_path, merge_path);
+
+    string system_prompt_start;
+    string system_prompt_end;
+
+    PhoneLMConfig config(tokens_limit, "1.5B");
+    auto model = PhoneLMForCausalLM(config);
+    model.load(model_path);
+
+    vector<string> in_strs = {
+        "Hello, who are you?",
+        "What can you do?",
+        "Please introduce Beijing University of Posts and Telecommunications.",
+    };
+
+    for (int i = 0; i < in_strs.size(); ++i) {
+        auto input_str = tokenizer.apply_chat_template(in_strs[i]);
+        auto input_tensor = tokenizer.tokenize(input_str);
+        std::cout << "[Q] " << in_strs[i] << std::endl;
+        std::cout << "[A] " << std::flush;
+        LlmTextGeneratorOpts opt{
+            .max_new_tokens = 100,
+            .do_sample = false,
+        };
+        model.generate(input_tensor, opt, [&](unsigned int out_token) -> bool {
+            auto out_string = tokenizer.detokenize({out_token});
+            auto [not_end, output_string] = tokenizer.postprocess(out_string);
+            if (!not_end) { return false; }
+            std::cout << output_string << std::flush;
+            return true;
+        });
+        model.clear_kvcache();
+        std::cout << "\n";
+    }
+    return 0;
+}
diff --git a/src/models/phonelm/configuration_phonelm.hpp b/src/models/phonelm/configuration_phonelm.hpp
@@ -0,0 +1,117 @@
+/**
+ * @file configuration_gemma.hpp
+ * @author Chenghua Wang ([email protected])
+ * @brief configuration file of qwen llm.
+ * @version 0.1
+ * @date 2024-04-03
+ *
+ * @copyright Copyright (c) 2024
+ *
+ */
+
+#ifndef CONFIG_PHONELM_HPP
+#define CONFIG_PHONELM_HPP
+#include "Types.hpp"
+#include "models/transformer/configuration_transformer.hpp"
+#include <cctype>
+#include <iterator>
+
+using namespace mllm;
+
+class PhoneLMNameConfig : public TransformerNameConfig {
+public:
+    /**
+     * @brief PhoneLM2 following the hugging face naming method
+     *
+     */
+    void init() {
+        blk_name = "model.layers.";
+        _attn_base_name = "self_attn.";
+        _ffn_base_name = "mlp.";
+        _q_proj_name = "q_proj";
+        _k_proj_name = "k_proj";
+        _v_proj_name = "v_proj";
+        _o_proj_name = "o_proj";
+        _gate_proj_name = "gate_proj";
+        _up_proj_name = "up_proj";
+        _down_proj_name = "down_proj";
+        _attn_norm_name = "input_layernorm";
+        _ffn_norm_name = "post_attention_layernorm";
+        token_embd_name = "model.embed_tokens";
+        post_norm_name = "model.norm";
+        lm_head_name = "lm_head";
+    }
+
+    std::string blk_name;
+    std::string token_embd_name;
+    std::string post_norm_name;
+    std::string lm_head_name;
+    std::string _gate_proj_name;
+};
+
+struct PhoneLMdimConfig {
+    int hidden_size = 1024;
+    int intermediate_size = 2816;
+    int num_attention_heads = 16;
+    int num_key_value_heads = 16;
+    int num_hidden_layers = 24;
+    string activation = "ReLU";
+};
+
+struct PhoneLMConfig : public TransformerConfig {
+    explicit PhoneLMConfig(int token_limit, string billions = "1.5B") :
+        cache_limit(token_limit) {
+        names_config.init();
+        string billionsType;
+        std::transform(billions.begin(), billions.end(), std::back_inserter(billionsType),
+                       ::tolower);
+        if (billionsType == "1.5b") {
+            hidden_size = 2560;
+            intermediate_size = 6816; // 6912; // 6816;
+            num_attention_heads = 16;
+            num_key_value_heads = 16;
+            num_hidden_layers = 19;
+        } else if (billionsType == "0.5b") {
+            hidden_size = 1024;
+            intermediate_size = 4864;
+            num_attention_heads = 16;
+            num_key_value_heads = 16;
+            num_hidden_layers = 24;
+        } else {
+            throw std::runtime_error("Unsupported PhoneLM model size");
+        }
+    };
+    explicit PhoneLMConfig(int token_limit, PhoneLMdimConfig dim_config) :
+        cache_limit(token_limit) {
+        names_config.init();
+        hidden_size = dim_config.hidden_size;
+        intermediate_size = dim_config.intermediate_size;
+        num_attention_heads = dim_config.num_attention_heads;
+        num_key_value_heads = dim_config.num_key_value_heads;
+        num_hidden_layers = dim_config.num_hidden_layers;
+        hidden_act = dim_config.activation;
+    };
+
+    float attention_dropout = 0.0;
+    int bos_token_id = 151643;
+    int eos_token_id = 151643;
+    std::string hidden_act = "ReLU";
+    int hidden_size = 1024;
+    float initializer_range = 0.02;
+    int intermediate_size = 2816;
+    int max_position_embeddings = 32768;
+    int max_window_layers = 21;
+    int num_attention_heads = 16;
+    int num_hidden_layers = 24;
+    int num_key_value_heads = 16;
+    double rms_norm_eps = 1e-6;
+    float rope_theta = 1000000.0;
+    int vocab_size = 49152;
+    bool tie_embedding_words = true;
+
+    int cache_limit;
+    RoPEType RoPE_type = RoPEType::HFHUBROPE;
+    PhoneLMNameConfig names_config;
+};
+
+#endif //! CONFIG_PHONELM_HPP