llama.cpp: update llama.cpp submodule

Signed-off-by: Daniel Bevenius <[email protected]>
danbev · May 23, 2024 · d0971c6 · d0971c6
1 parent 5786289
commit d0971c6
Show file tree

Hide file tree

Showing 3 changed files with 6 additions and 3 deletions.
diff --git a/fundamentals/llama.cpp/Makefile b/fundamentals/llama.cpp/Makefile
@@ -36,6 +36,9 @@ OBJS = llama.cpp/common.o \
        llama.cpp/ggml-backend.o \
        llama.cpp/unicode.o \
        llama.cpp/unicode-data.o \
+       llama.cpp/sgemm.o \
+       llama.cpp/sampling.o \
+       llama.cpp/grammar-parser.o \
        llama.cpp/json-schema-to-grammar.o
 
 simple-prompt: src/simple-prompt.cpp

diff --git a/fundamentals/llama.cpp/llama.cpp b/fundamentals/llama.cpp/llama.cpp
diff --git a/fundamentals/llama.cpp/src/simple-prompt.cpp b/fundamentals/llama.cpp/src/simple-prompt.cpp
@@ -182,14 +182,14 @@ int main(int argc, char** argv) {
         // 8 here a "guess". If the token is longer than 8 bytes then we
         // will resize the result vector and call llama_token_to_piece again.
         std::vector<char> piece(8, 0);
-        int n_tokens = llama_token_to_piece(model, new_token_id, piece.data(), piece.size());
+        int n_tokens = llama_token_to_piece(model, new_token_id, piece.data(), piece.size(), false);
         // llama_token_to_piece will return the negative length of the token if
         // it is longer that the passed in result.length. If that is the case
         // then we need to resize the result vector to the length of the token
         // and call llama_token_to_piece again.
         if (n_tokens < 0) {
             piece.resize(-n_tokens);
-            int new_len = llama_token_to_piece(model, new_token_id, piece.data(), piece.size());
+            int new_len = llama_token_to_piece(model, new_token_id, piece.data(), piece.size(), false);
         } else {
             piece.resize(n_tokens);
         }