-
Notifications
You must be signed in to change notification settings - Fork 871
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Llama.cpp example for cpp backend (#2904)
* Version1 of llm inference with cpp backend Signed-off-by: Shrinath Suresh <[email protected]> Updating llm handler - loadmodel, preprocess, inference methods Signed-off-by: Shrinath Suresh <[email protected]> Fixed infinite lock by adding request ids to the preprocess method Signed-off-by: Shrinath Suresh <[email protected]> Adding test script for finding tokens per second llama-7b-chat and ggml version Signed-off-by: Shrinath Suresh <[email protected]> GGUF Compatibility Signed-off-by: Shrinath Suresh <[email protected]> Fixing unit tests Signed-off-by: Shrinath Suresh <[email protected]> Fix typo Signed-off-by: Shrinath Suresh <[email protected]> Using folly to read config path Signed-off-by: Shrinath Suresh <[email protected]> Removing debug couts Signed-off-by: Shrinath Suresh <[email protected]> Processing all the items in the batch Signed-off-by: Shrinath Suresh <[email protected]> Adopted llama.cpp api changes * Adapt to removal of TS backend * Re-add test for llama.cpp example * Add llama.cpp as a submodule * Point to correct llama.cpp installation * Build llama.cpp in build.sh * Skip llama.cpp example test if model weights are not available * renamed torchscript_model folder into examples * Adjust to new base_handler interface * Remove debug statement * Rename llamacpp class + remove dummy.pt file * Move llamacpp config.json * Moved and created prompt file * Reset context for mutiple batch entries * Add doc for llamacpp example * Fix spell check * Replace output example in llamacpp example * Move cpp example src into main examples folder * Convert cerr/cout into logs --------- Co-authored-by: Shrinath Suresh <[email protected]>
- Loading branch information
1 parent
3ecaf0b
commit a07b7d9
Showing
40 changed files
with
564 additions
and
67 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,6 @@ | ||
[submodule "third_party/google/rpc"] | ||
path = third_party/google/rpc | ||
url = https://github.com/googleapis/googleapis.git | ||
[submodule "cpp/third-party/llama.cpp"] | ||
path = cpp/third-party/llama.cpp | ||
url = https://github.com/ggerganov/llama.cpp.git |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,6 @@ | ||
set(MNIST_SRC_DIR "${torchserve_cpp_SOURCE_DIR}/src/examples/image_classifier/mnist") | ||
|
||
set(MNIST_SOURCE_FILES "") | ||
list(APPEND MNIST_SOURCE_FILES ${MNIST_SRC_DIR}/mnist_handler.cc) | ||
add_library(mnist_handler SHARED ${MNIST_SOURCE_FILES}) | ||
target_include_directories(mnist_handler PUBLIC ${MNIST_SRC_DIR}) | ||
target_link_libraries(mnist_handler PRIVATE ts_backends_core ts_utils ${TORCH_LIBRARIES}) | ||
add_subdirectory("../../../examples/cpp/babyllama/" "../../../test/resources/examples/babyllama/babyllama_handler/") | ||
|
||
add_subdirectory("../../../examples/cpp/llamacpp/" "../../../test/resources/examples/llamacpp/llamacpp_handler/") | ||
|
||
set(BABYLLAMA_SRC_DIR "${torchserve_cpp_SOURCE_DIR}/src/examples/babyllama") | ||
set(BABYLLAMA_SOURCE_FILES "") | ||
list(APPEND BABYLLAMA_SOURCE_FILES ${BABYLLAMA_SRC_DIR}/baby_llama_handler.cc) | ||
add_library(babyllama_handler SHARED ${BABYLLAMA_SOURCE_FILES}) | ||
target_include_directories(babyllama_handler PUBLIC ${BABYLLAMA_SRC_DIR}) | ||
target_link_libraries(babyllama_handler PRIVATE ts_backends_core ts_utils ${TORCH_LIBRARIES}) | ||
target_compile_options(babyllama_handler PRIVATE -Wall -Wextra -Ofast) | ||
add_subdirectory("../../../examples/cpp/mnist/" "../../../test/resources/examples/mnist/mnist_handler/") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,38 @@ | ||
#include <fstream> | ||
|
||
#include "test/utils/common.hh" | ||
|
||
TEST_F(ModelPredictTest, TestLoadPredictBabyLlamaHandler) { | ||
std::string base_dir = "test/resources/examples/babyllama/"; | ||
std::string file1 = base_dir + "babyllama_handler/stories15M.bin"; | ||
std::string file2 = base_dir + "babyllama_handler/tokenizer.bin"; | ||
|
||
std::ifstream f1(file1); | ||
std::ifstream f2(file2); | ||
|
||
if (!f1.good() && !f2.good()) | ||
GTEST_SKIP() | ||
<< "Skipping TestLoadPredictBabyLlamaHandler because of missing files: " | ||
<< file1 << " or " << file2; | ||
|
||
this->LoadPredict( | ||
std::make_shared<torchserve::LoadModelRequest>( | ||
base_dir + "babyllama_handler", "llm", -1, "", "", 1, false), | ||
base_dir + "babyllama_handler", base_dir + "prompt.txt", "llm_ts", 200); | ||
} | ||
|
||
TEST_F(ModelPredictTest, TestLoadPredictLlmHandler) { | ||
std::string base_dir = "test/resources/examples/llamacpp/"; | ||
std::string file1 = base_dir + "llamacpp_handler/llama-2-7b-chat.Q5_0.gguf"; | ||
std::ifstream f(file1); | ||
|
||
if (!f.good()) | ||
GTEST_SKIP() | ||
<< "Skipping TestLoadPredictLlmHandler because of missing file: " | ||
<< file1; | ||
|
||
this->LoadPredict( | ||
std::make_shared<torchserve::LoadModelRequest>( | ||
"test/resources/torchscript_model/babyllama/babyllama_handler", "llm", | ||
-1, "", "", 1, false), | ||
"test/resources/torchscript_model/babyllama/babyllama_handler", | ||
"test/resources/torchscript_model/babyllama/prompt.txt", "llm_ts", 200); | ||
base_dir + "llamacpp_handler", "llamacpp", -1, "", "", 1, false), | ||
base_dir + "llamacpp_handler", base_dir + "prompt.txt", "llm_ts", 200); | ||
} |
File renamed without changes.
4 changes: 4 additions & 0 deletions
4
cpp/test/resources/examples/babyllama/babyllama_handler/config.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
{ | ||
"checkpoint_path" : "test/resources/examples/babyllama/babyllama_handler/stories15M.bin", | ||
"tokenizer_path" : "test/resources/examples/babyllama/babyllama_handler/tokenizer.bin" | ||
} |
File renamed without changes.
File renamed without changes.
10 changes: 10 additions & 0 deletions
10
cpp/test/resources/examples/llamacpp/llamacpp_handler/MAR-INF/MANIFEST.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
{ | ||
"createdOn": "28/07/2020 06:32:08", | ||
"runtime": "LSP", | ||
"model": { | ||
"modelName": "llamacpp", | ||
"handler": "libllamacpp_handler:LlamaCppHandler", | ||
"modelVersion": "2.0" | ||
}, | ||
"archiverVersion": "0.2.0" | ||
} |
File renamed without changes
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
5 changes: 0 additions & 5 deletions
5
cpp/test/resources/torchscript_model/babyllama/babyllama_handler/config.json
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
|
||
add_library(babyllama_handler SHARED src/baby_llama_handler.cc) | ||
|
||
target_link_libraries(babyllama_handler PRIVATE ts_backends_core ts_utils ${TORCH_LIBRARIES}) | ||
target_compile_options(babyllama_handler PRIVATE -Wall -Wextra -Ofast) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
{ | ||
"checkpoint_path" : "/home/ubuntu/serve/examples/cpp/babyllama/stories15M.bin", | ||
"tokenizer_path" : "/home/ubuntu/serve/examples/cpp/babyllama/tokenizer.bin" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
set(LLAMACPP_SRC_DIR "${torchserve_cpp_SOURCE_DIR}/third-party/llama.cpp") | ||
|
||
add_library(llamacpp_handler SHARED src/llamacpp_handler.cc) | ||
|
||
set(MY_OBJECT_FILES | ||
${LLAMACPP_SRC_DIR}/ggml.o | ||
${LLAMACPP_SRC_DIR}/llama.o | ||
${LLAMACPP_SRC_DIR}/common.o | ||
${LLAMACPP_SRC_DIR}/ggml-quants.o | ||
${LLAMACPP_SRC_DIR}/ggml-alloc.o | ||
${LLAMACPP_SRC_DIR}/grammar-parser.o | ||
${LLAMACPP_SRC_DIR}/console.o | ||
${LLAMACPP_SRC_DIR}/build-info.o | ||
${LLAMACPP_SRC_DIR}/ggml-backend.o | ||
|
||
) | ||
|
||
target_sources(llamacpp_handler PRIVATE ${MY_OBJECT_FILES}) | ||
target_include_directories(llamacpp_handler PUBLIC ${LLAMACPP_SRC_DIR}) | ||
target_link_libraries(llamacpp_handler PRIVATE ts_backends_core ts_utils ${TORCH_LIBRARIES}) |
Oops, something went wrong.