-
Notifications
You must be signed in to change notification settings - Fork 83
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
49,307 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
#include <iostream> | ||
#include <vector> | ||
#include "Types.hpp" | ||
#include "cmdline.h" | ||
#include "models/phonelm/modeling_phonelm.hpp" | ||
#include "models/smollm/tokenization_smollm.hpp" | ||
|
||
using namespace mllm; | ||
|
||
int main(int argc, char **argv) { | ||
cmdline::parser cmdParser; | ||
cmdParser.add<string>("vocab", 'v', "specify mllm tokenizer model path", false, "../vocab/phonelm_vocab.mllm"); | ||
cmdParser.add<string>("merge", 'e', "specify mllm merge file path", false, "../vocab/phonelm_merges.txt"); | ||
cmdParser.add<string>("model", 'o', "specify mllm model path", false, "../models/phonelm-1.5b-instruct-fp32.mllm"); | ||
cmdParser.add<int>("limits", 'l', "max KV cache size", false, 400); | ||
cmdParser.add<int>("thread", 't', "num of threads", false, 40); | ||
cmdParser.parse_check(argc, argv); | ||
|
||
string merge_path = cmdParser.get<string>("merge"); | ||
string vocab_path = cmdParser.get<string>("vocab"); | ||
string model_path = cmdParser.get<string>("model"); | ||
int tokens_limit = cmdParser.get<int>("limits"); | ||
CPUBackend::cpu_threads = cmdParser.get<int>("thread"); | ||
|
||
auto tokenizer = SmolLMTokenizer(vocab_path, merge_path); | ||
|
||
string system_prompt_start; | ||
string system_prompt_end; | ||
|
||
PhoneLMConfig config(tokens_limit, "1.5B"); | ||
auto model = PhoneLMForCausalLM(config); | ||
model.load(model_path); | ||
|
||
vector<string> in_strs = { | ||
"Hello, who are you?", | ||
"What can you do?", | ||
"Please introduce Beijing University of Posts and Telecommunications.", | ||
}; | ||
|
||
for (int i = 0; i < in_strs.size(); ++i) { | ||
auto input_str = tokenizer.apply_chat_template(in_strs[i]); | ||
auto input_tensor = tokenizer.tokenize(input_str); | ||
std::cout << "[Q] " << in_strs[i] << std::endl; | ||
std::cout << "[A] " << std::flush; | ||
LlmTextGeneratorOpts opt{ | ||
.max_new_tokens = 100, | ||
.do_sample = false, | ||
}; | ||
model.generate(input_tensor, opt, [&](unsigned int out_token) -> bool { | ||
auto out_string = tokenizer.detokenize({out_token}); | ||
auto [not_end, output_string] = tokenizer.postprocess(out_string); | ||
if (!not_end) { return false; } | ||
std::cout << output_string << std::flush; | ||
return true; | ||
}); | ||
model.clear_kvcache(); | ||
std::cout << "\n"; | ||
} | ||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
/** | ||
* @file configuration_gemma.hpp | ||
* @author Chenghua Wang ([email protected]) | ||
* @brief configuration file of qwen llm. | ||
* @version 0.1 | ||
* @date 2024-04-03 | ||
* | ||
* @copyright Copyright (c) 2024 | ||
* | ||
*/ | ||
|
||
#ifndef CONFIG_PHONELM_HPP | ||
#define CONFIG_PHONELM_HPP | ||
#include "Types.hpp" | ||
#include "models/transformer/configuration_transformer.hpp" | ||
#include <cctype> | ||
#include <iterator> | ||
|
||
using namespace mllm; | ||
|
||
class PhoneLMNameConfig : public TransformerNameConfig { | ||
public: | ||
/** | ||
* @brief PhoneLM2 following the hugging face naming method | ||
* | ||
*/ | ||
void init() { | ||
blk_name = "model.layers."; | ||
_attn_base_name = "self_attn."; | ||
_ffn_base_name = "mlp."; | ||
_q_proj_name = "q_proj"; | ||
_k_proj_name = "k_proj"; | ||
_v_proj_name = "v_proj"; | ||
_o_proj_name = "o_proj"; | ||
_gate_proj_name = "gate_proj"; | ||
_up_proj_name = "up_proj"; | ||
_down_proj_name = "down_proj"; | ||
_attn_norm_name = "input_layernorm"; | ||
_ffn_norm_name = "post_attention_layernorm"; | ||
token_embd_name = "model.embed_tokens"; | ||
post_norm_name = "model.norm"; | ||
lm_head_name = "lm_head"; | ||
} | ||
|
||
std::string blk_name; | ||
std::string token_embd_name; | ||
std::string post_norm_name; | ||
std::string lm_head_name; | ||
std::string _gate_proj_name; | ||
}; | ||
|
||
struct PhoneLMdimConfig { | ||
int hidden_size = 1024; | ||
int intermediate_size = 2816; | ||
int num_attention_heads = 16; | ||
int num_key_value_heads = 16; | ||
int num_hidden_layers = 24; | ||
string activation = "ReLU"; | ||
}; | ||
|
||
struct PhoneLMConfig : public TransformerConfig { | ||
explicit PhoneLMConfig(int token_limit, string billions = "1.5B") : | ||
cache_limit(token_limit) { | ||
names_config.init(); | ||
string billionsType; | ||
std::transform(billions.begin(), billions.end(), std::back_inserter(billionsType), | ||
::tolower); | ||
if (billionsType == "1.5b") { | ||
hidden_size = 2560; | ||
intermediate_size = 6816; // 6912; // 6816; | ||
num_attention_heads = 16; | ||
num_key_value_heads = 16; | ||
num_hidden_layers = 19; | ||
} else if (billionsType == "0.5b") { | ||
hidden_size = 1024; | ||
intermediate_size = 4864; | ||
num_attention_heads = 16; | ||
num_key_value_heads = 16; | ||
num_hidden_layers = 24; | ||
} else { | ||
throw std::runtime_error("Unsupported PhoneLM model size"); | ||
} | ||
}; | ||
explicit PhoneLMConfig(int token_limit, PhoneLMdimConfig dim_config) : | ||
cache_limit(token_limit) { | ||
names_config.init(); | ||
hidden_size = dim_config.hidden_size; | ||
intermediate_size = dim_config.intermediate_size; | ||
num_attention_heads = dim_config.num_attention_heads; | ||
num_key_value_heads = dim_config.num_key_value_heads; | ||
num_hidden_layers = dim_config.num_hidden_layers; | ||
hidden_act = dim_config.activation; | ||
}; | ||
|
||
float attention_dropout = 0.0; | ||
int bos_token_id = 151643; | ||
int eos_token_id = 151643; | ||
std::string hidden_act = "ReLU"; | ||
int hidden_size = 1024; | ||
float initializer_range = 0.02; | ||
int intermediate_size = 2816; | ||
int max_position_embeddings = 32768; | ||
int max_window_layers = 21; | ||
int num_attention_heads = 16; | ||
int num_hidden_layers = 24; | ||
int num_key_value_heads = 16; | ||
double rms_norm_eps = 1e-6; | ||
float rope_theta = 1000000.0; | ||
int vocab_size = 49152; | ||
bool tie_embedding_words = true; | ||
|
||
int cache_limit; | ||
RoPEType RoPE_type = RoPEType::HFHUBROPE; | ||
PhoneLMNameConfig names_config; | ||
}; | ||
|
||
#endif //! CONFIG_PHONELM_HPP |
Oops, something went wrong.