Skip to content

Commit

Permalink
feat: add PhoneLM
Browse files Browse the repository at this point in the history
  • Loading branch information
yirongjie committed Oct 24, 2024
1 parent 722d27d commit 70b3174
Show file tree
Hide file tree
Showing 8 changed files with 49,307 additions and 4 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ mllm.egg-info/

examples/demo_deepseek.cpp
src/models/deepseek/*
examples/demo_phonellm.cpp
src/models/phonellm/*
# examples/demo_phonelm.cpp
# src/models/phonelm/*
examples/demo_minicpm3.cpp
src/models/minicpm3/*
examples/demo.cpp
2 changes: 1 addition & 1 deletion examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func_llm_add_executable(demo_minicpm)
func_llm_add_executable(demo_smollm)
func_llm_add_executable(demo_openelm)
func_llm_add_executable(demo_dclm)
# func_llm_add_executable(demo_phonellm)
func_llm_add_executable(demo_phonelm)

func_vlm_add_executable(demo_llava)
func_vlm_add_executable(demo_fuyu)
Expand Down
60 changes: 60 additions & 0 deletions examples/demo_phonelm.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#include <iostream>
#include <vector>
#include "Types.hpp"
#include "cmdline.h"
#include "models/phonelm/modeling_phonelm.hpp"
#include "models/smollm/tokenization_smollm.hpp"

using namespace mllm;

int main(int argc, char **argv) {
cmdline::parser cmdParser;
cmdParser.add<string>("vocab", 'v', "specify mllm tokenizer model path", false, "../vocab/phonelm_vocab.mllm");
cmdParser.add<string>("merge", 'e', "specify mllm merge file path", false, "../vocab/phonelm_merges.txt");
cmdParser.add<string>("model", 'o', "specify mllm model path", false, "../models/phonelm-1.5b-instruct-fp32.mllm");
cmdParser.add<int>("limits", 'l', "max KV cache size", false, 400);
cmdParser.add<int>("thread", 't', "num of threads", false, 40);
cmdParser.parse_check(argc, argv);

string merge_path = cmdParser.get<string>("merge");
string vocab_path = cmdParser.get<string>("vocab");
string model_path = cmdParser.get<string>("model");
int tokens_limit = cmdParser.get<int>("limits");
CPUBackend::cpu_threads = cmdParser.get<int>("thread");

auto tokenizer = SmolLMTokenizer(vocab_path, merge_path);

string system_prompt_start;
string system_prompt_end;

PhoneLMConfig config(tokens_limit, "1.5B");
auto model = PhoneLMForCausalLM(config);
model.load(model_path);

vector<string> in_strs = {
"Hello, who are you?",
"What can you do?",
"Please introduce Beijing University of Posts and Telecommunications.",
};

for (int i = 0; i < in_strs.size(); ++i) {
auto input_str = tokenizer.apply_chat_template(in_strs[i]);
auto input_tensor = tokenizer.tokenize(input_str);
std::cout << "[Q] " << in_strs[i] << std::endl;
std::cout << "[A] " << std::flush;
LlmTextGeneratorOpts opt{
.max_new_tokens = 100,
.do_sample = false,
};
model.generate(input_tensor, opt, [&](unsigned int out_token) -> bool {
auto out_string = tokenizer.detokenize({out_token});
auto [not_end, output_string] = tokenizer.postprocess(out_string);
if (!not_end) { return false; }
std::cout << output_string << std::flush;
return true;
});
model.clear_kvcache();
std::cout << "\n";
}
return 0;
}
117 changes: 117 additions & 0 deletions src/models/phonelm/configuration_phonelm.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
/**
* @file configuration_gemma.hpp
* @author Chenghua Wang ([email protected])
* @brief configuration file of qwen llm.
* @version 0.1
* @date 2024-04-03
*
* @copyright Copyright (c) 2024
*
*/

#ifndef CONFIG_PHONELM_HPP
#define CONFIG_PHONELM_HPP
#include "Types.hpp"
#include "models/transformer/configuration_transformer.hpp"
#include <cctype>
#include <iterator>

using namespace mllm;

class PhoneLMNameConfig : public TransformerNameConfig {
public:
/**
* @brief PhoneLM2 following the hugging face naming method
*
*/
void init() {
blk_name = "model.layers.";
_attn_base_name = "self_attn.";
_ffn_base_name = "mlp.";
_q_proj_name = "q_proj";
_k_proj_name = "k_proj";
_v_proj_name = "v_proj";
_o_proj_name = "o_proj";
_gate_proj_name = "gate_proj";
_up_proj_name = "up_proj";
_down_proj_name = "down_proj";
_attn_norm_name = "input_layernorm";
_ffn_norm_name = "post_attention_layernorm";
token_embd_name = "model.embed_tokens";
post_norm_name = "model.norm";
lm_head_name = "lm_head";
}

std::string blk_name;
std::string token_embd_name;
std::string post_norm_name;
std::string lm_head_name;
std::string _gate_proj_name;
};

struct PhoneLMdimConfig {
int hidden_size = 1024;
int intermediate_size = 2816;
int num_attention_heads = 16;
int num_key_value_heads = 16;
int num_hidden_layers = 24;
string activation = "ReLU";
};

struct PhoneLMConfig : public TransformerConfig {
explicit PhoneLMConfig(int token_limit, string billions = "1.5B") :
cache_limit(token_limit) {
names_config.init();
string billionsType;
std::transform(billions.begin(), billions.end(), std::back_inserter(billionsType),
::tolower);
if (billionsType == "1.5b") {
hidden_size = 2560;
intermediate_size = 6816; // 6912; // 6816;
num_attention_heads = 16;
num_key_value_heads = 16;
num_hidden_layers = 19;
} else if (billionsType == "0.5b") {
hidden_size = 1024;
intermediate_size = 4864;
num_attention_heads = 16;
num_key_value_heads = 16;
num_hidden_layers = 24;
} else {
throw std::runtime_error("Unsupported PhoneLM model size");
}
};
explicit PhoneLMConfig(int token_limit, PhoneLMdimConfig dim_config) :
cache_limit(token_limit) {
names_config.init();
hidden_size = dim_config.hidden_size;
intermediate_size = dim_config.intermediate_size;
num_attention_heads = dim_config.num_attention_heads;
num_key_value_heads = dim_config.num_key_value_heads;
num_hidden_layers = dim_config.num_hidden_layers;
hidden_act = dim_config.activation;
};

float attention_dropout = 0.0;
int bos_token_id = 151643;
int eos_token_id = 151643;
std::string hidden_act = "ReLU";
int hidden_size = 1024;
float initializer_range = 0.02;
int intermediate_size = 2816;
int max_position_embeddings = 32768;
int max_window_layers = 21;
int num_attention_heads = 16;
int num_hidden_layers = 24;
int num_key_value_heads = 16;
double rms_norm_eps = 1e-6;
float rope_theta = 1000000.0;
int vocab_size = 49152;
bool tie_embedding_words = true;

int cache_limit;
RoPEType RoPE_type = RoPEType::HFHUBROPE;
PhoneLMNameConfig names_config;
};

#endif //! CONFIG_PHONELM_HPP
Loading

0 comments on commit 70b3174

Please sign in to comment.