Skip to content
This repository has been archived by the owner on Jun 24, 2024. It is now read-only.

Commit

Permalink
fix(llmb): minor changes
Browse files Browse the repository at this point in the history
  • Loading branch information
philpax committed Dec 12, 2023
1 parent e46a433 commit 86b068c
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 22 deletions.
14 changes: 4 additions & 10 deletions crates/llm-base/src/tokenizer/embedded.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,16 +172,10 @@ impl EmbeddedTokenizer {
match self.model {
GgufEmbeddedTokenizerModel::Llama => {
let text = escape_whitespace(format!(" {}", text).as_bytes());

let _token_ids: Vec<_> = TokenizerSpm::new(self)
.tokenize(&text)
.into_iter()
.map(|id| {
// TODO: see if this can be made more efficient
output.push((self.id_to_token[id as usize].text.clone(), id));
(self.id_to_token[id as usize].text.clone(), id)
})
.collect();
for id in TokenizerSpm::new(self).tokenize(&text) {
// TODO: see if this can be made more efficient
output.push((self.id_to_token[id as usize].text.clone(), id));
}
Ok(output)
}
_ => unimplemented!(),
Expand Down
12 changes: 1 addition & 11 deletions crates/llm-base/src/tokenizer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,6 @@ impl Display for HuggingFaceTokenizerErrorSource {
}
}

/// At the time of writing, the embedded tokenizer is not enabled as it has
/// some bugs. We're just not enabling the option while it's broken.
const EMBEDDED_TOKENIZER_ENABLED: bool = true;

#[derive(Clone, Debug, PartialEq)]
/// The source of a tokenizer.
pub enum TokenizerSource {
Expand Down Expand Up @@ -140,13 +136,7 @@ impl TokenizerSource {
if let Ok(hf) = gguf.metadata.get_str("tokenizer.huggingface.json") {
Ok(Self::load_huggingface_json(hf)?)
} else if EmbeddedTokenizer::is_present_in_metadata(&gguf.metadata) {
if EMBEDDED_TOKENIZER_ENABLED {
Ok(EmbeddedTokenizer::from_metadata(&gguf.metadata)?.into())
} else {
Err(TokenizerLoadError::NoSupportedTokenizersFound {
unsupported_tokenizers: vec!["embedded".to_owned()],
})
}
Ok(EmbeddedTokenizer::from_metadata(&gguf.metadata)?.into())
} else {
Err(TokenizerLoadError::NoSupportedTokenizersFound {
unsupported_tokenizers: vec![],
Expand Down
2 changes: 1 addition & 1 deletion crates/llm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ macro_rules! define_models {

impl ModelArchitecture {
/// All available model architectures
pub const ALL: &[Self] = &[
pub const ALL: &'static [Self] = &[
$(
#[cfg(feature = $model_lowercase_str)]
Self::$model_pascalcase,
Expand Down

0 comments on commit 86b068c

Please sign in to comment.