Skip to content

Commit

Permalink
merge latest upstream
Browse files Browse the repository at this point in the history
  • Loading branch information
marty1885 committed Sep 17, 2024
2 parents 8a56935 + 0d2ec43 commit 2c40066
Show file tree
Hide file tree
Showing 121 changed files with 9,303 additions and 6,722 deletions.
19 changes: 11 additions & 8 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ env:
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
GGML_NLOOP: 3
GGML_N_THREADS: 1
LLAMA_LOG_COLORS: 1
LLAMA_LOG_PREFIX: 1
LLAMA_LOG_TIMESTAMPS: 1

jobs:
macOS-latest-cmake-arm64:
Expand Down Expand Up @@ -375,7 +378,7 @@ jobs:
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: Dependencies
id: depends
Expand All @@ -401,7 +404,7 @@ jobs:
continue-on-error: true

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4

- name: add oneAPI to apt
shell: bash
Expand Down Expand Up @@ -442,7 +445,7 @@ jobs:
continue-on-error: true

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4

- name: add oneAPI to apt
shell: bash
Expand Down Expand Up @@ -546,7 +549,7 @@ jobs:
steps:
- name: Clone
id: checkout
uses: actions/checkout@v1
uses: actions/checkout@v4

- name: Dependencies
id: depends
Expand Down Expand Up @@ -576,7 +579,7 @@ jobs:
steps:
- name: Clone
id: checkout
uses: actions/checkout@v1
uses: actions/checkout@v4

- name: Dependencies
id: depends
Expand Down Expand Up @@ -610,7 +613,7 @@ jobs:
steps:
- name: Clone
id: checkout
uses: actions/checkout@v1
uses: actions/checkout@v4

- name: Dependencies
id: depends
Expand Down Expand Up @@ -969,14 +972,14 @@ jobs:
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: Install
id: depends
run: |
$ErrorActionPreference = "Stop"
write-host "Downloading AMD HIP SDK Installer"
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-23.Q4-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
write-host "Installing AMD HIP SDK"
Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
write-host "Completed AMD HIP SDK installation"
Expand Down
7 changes: 7 additions & 0 deletions .github/workflows/server.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ on:
types: [opened, synchronize, reopened]
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']

env:
LLAMA_LOG_COLORS: 1
LLAMA_LOG_PREFIX: 1
LLAMA_LOG_TIMESTAMPS: 1
LLAMA_LOG_VERBOSITY: 10

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
Expand Down Expand Up @@ -173,6 +179,7 @@ jobs:
if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
run: |
cd examples/server/tests
$env:PYTHONIOENCODING = ":replace"
behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp
- name: Slow tests
Expand Down
14 changes: 10 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,11 @@ set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS})

# change the default for these ggml options
if (NOT DEFINED GGML_LLAMAFILE)
set(GGML_LLAMAFILE ON)
set(GGML_LLAMAFILE_DEFAULT ON)
endif()

if (NOT DEFINED GGML_CUDA_USE_GRAPHS)
set(GGML_CUDA_USE_GRAPHS ON)
if (NOT DEFINED GGML_CUDA_GRAPHS)
set(GGML_CUDA_GRAPHS_DEFAULT ON)
endif()

# transition helpers
Expand Down Expand Up @@ -139,10 +139,16 @@ set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location o
# determining _precisely_ which defines are necessary for the llama-config
# package.
#
set(GGML_TRANSIENT_DEFINES)
get_target_property(GGML_DIRECTORY ggml SOURCE_DIR)
get_directory_property(GGML_DIR_DEFINES DIRECTORY ${GGML_DIRECTORY} COMPILE_DEFINITIONS)
if (GGML_DIR_DEFINES)
list(APPEND GGML_TRANSIENT_DEFINES ${GGML_DIR_DEFINES})
endif()
get_target_property(GGML_TARGET_DEFINES ggml COMPILE_DEFINITIONS)
set(GGML_TRANSIENT_DEFINES ${GGML_TARGET_DEFINES} ${GGML_DIR_DEFINES})
if (GGML_TARGET_DEFINES)
list(APPEND GGML_TRANSIENT_DEFINES ${GGML_TARGET_DEFINES})
endif()
get_target_property(GGML_LINK_LIBRARIES ggml LINK_LIBRARIES)

set_target_properties(llama PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/llama.h)
Expand Down
57 changes: 44 additions & 13 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ TEST_TARGETS = \
tests/test-grammar-parser \
tests/test-json-schema-to-grammar \
tests/test-llama-grammar \
tests/test-log \
tests/test-model-load-cancel \
tests/test-opt \
tests/test-quantize-fns \
Expand Down Expand Up @@ -148,6 +149,14 @@ GGML_NO_METAL := 1
DEPRECATE_WARNING := 1
endif

ifdef LLAMA_DISABLE_LOGS
REMOVE_WARNING := 1
endif

ifdef LLAMA_SERVER_VERBOSE
REMOVE_WARNING := 1
endif

ifndef UNAME_S
UNAME_S := $(shell uname -s)
endif
Expand Down Expand Up @@ -351,19 +360,11 @@ ifdef LLAMA_SANITIZE_UNDEFINED
MK_LDFLAGS += -fsanitize=undefined -g
endif

ifdef LLAMA_SERVER_VERBOSE
MK_CPPFLAGS += -DSERVER_VERBOSE=$(LLAMA_SERVER_VERBOSE)
endif

ifdef LLAMA_SERVER_SSL
MK_CPPFLAGS += -DCPPHTTPLIB_OPENSSL_SUPPORT
MK_LDFLAGS += -lssl -lcrypto
endif

ifdef LLAMA_DISABLE_LOGS
MK_CPPFLAGS += -DLOG_DISABLE_LOGS
endif # LLAMA_DISABLE_LOGS

# warnings
WARN_FLAGS = \
-Wall \
Expand Down Expand Up @@ -434,7 +435,7 @@ endif
# TODO: probably these flags need to be tweaked on some architectures
# feel free to update the Makefile for your architecture and send a pull request or issue

ifndef RISCV
ifndef RISCV_CROSS_COMPILE

ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
# Use all CPU extensions that are available:
Expand Down Expand Up @@ -514,7 +515,12 @@ ifneq ($(filter loongarch64%,$(UNAME_M)),)
MK_CXXFLAGS += -mlasx
endif

else
ifneq ($(filter riscv64%,$(UNAME_M)),)
MK_CFLAGS += -march=rv64gcv -mabi=lp64d
MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d
endif

else # RISC-V CROSS COMPILATION
MK_CFLAGS += -march=rv64gcv -mabi=lp64d
MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d
endif
Expand Down Expand Up @@ -613,7 +619,7 @@ ifdef GGML_CUDA
CUDA_PATH ?= /usr/local/cuda
endif

MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include -DGGML_CUDA_USE_GRAPHS
MK_CPPFLAGS += -DGGML_USE_CUDA -DGGML_CUDA_USE_GRAPHS -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
MK_NVCCFLAGS += -use_fast_math
endif # GGML_MUSA
Expand Down Expand Up @@ -925,6 +931,8 @@ OBJ_LLAMA = \

OBJ_COMMON = \
common/common.o \
common/arg.o \
common/log.o \
common/console.o \
common/ngram-cache.o \
common/sampling.o \
Expand Down Expand Up @@ -1021,6 +1029,14 @@ $(info - LLAMA_NO_CCACHE)
$(info )
endif

ifdef REMOVE_WARNING
$(info !!! REMOVAL WARNING !!!)
$(info The following LLAMA_ options have been removed and are no longer supported)
$(info - LLAMA_DISABLE_LOGS (https://github.com/ggerganov/llama.cpp/pull/9418))
$(info - LLAMA_SERVER_VERBOSE (https://github.com/ggerganov/llama.cpp/pull/9418))
$(info )
endif

#
# Build libraries
#
Expand Down Expand Up @@ -1157,6 +1173,16 @@ common/common.o: \
include/llama.h
$(CXX) $(CXXFLAGS) -c $< -o $@

common/arg.o: \
common/arg.cpp \
common/arg.h
$(CXX) $(CXXFLAGS) -c $< -o $@

common/log.o: \
common/log.cpp \
common/log.h
$(CXX) $(CXXFLAGS) -c $< -o $@

common/sampling.o: \
common/sampling.cpp \
common/sampling.h \
Expand Down Expand Up @@ -1335,7 +1361,7 @@ llama-cvector-generator: examples/cvector-generator/cvector-generator.cpp \
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)

llama-convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp \
$(OBJ_GGML) $(OBJ_LLAMA)
$(OBJ_ALL)
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)

Expand Down Expand Up @@ -1429,6 +1455,7 @@ llama-server: \
examples/server/system-prompts.js.hpp \
examples/server/prompt-formats.js.hpp \
examples/server/json-schema-to-grammar.mjs.hpp \
examples/server/loading.html.hpp \
common/json.hpp \
common/stb_image.h \
$(OBJ_ALL)
Expand All @@ -1448,7 +1475,6 @@ llama-gen-docs: examples/gen-docs/gen-docs.cpp \
$(OBJ_ALL)
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
./llama-gen-docs

libllava.a: examples/llava/llava.cpp \
examples/llava/llava.h \
Expand Down Expand Up @@ -1517,6 +1543,11 @@ tests/test-llama-grammar: tests/test-llama-grammar.cpp \
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)

tests/test-log: tests/test-log.cpp \
$(OBJ_ALL)
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)

tests/test-grammar-parser: tests/test-grammar-parser.cpp \
$(OBJ_ALL)
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
Expand Down
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others)

## Hot topics

- *add hot topics here*
- Huggingface GGUF editor: [discussion](https://github.com/ggerganov/llama.cpp/discussions/9268) | [tool](https://huggingface.co/spaces/CISCai/gguf-editor)

----

Expand Down Expand Up @@ -77,6 +77,7 @@ Typically finetunes of the base models below are supported as well.
- [x] [SEA-LION](https://huggingface.co/models?search=sea-lion)
- [x] [GritLM-7B](https://huggingface.co/GritLM/GritLM-7B) + [GritLM-8x7B](https://huggingface.co/GritLM/GritLM-8x7B)
- [x] [OLMo](https://allenai.org/olmo)
- [x] [OLMoE](https://huggingface.co/allenai/OLMoE-1B-7B-0924)
- [x] [Granite models](https://huggingface.co/collections/ibm-granite/granite-code-models-6624c5cec322e4c148c8b330)
- [x] [GPT-NeoX](https://github.com/EleutherAI/gpt-neox) + [Pythia](https://github.com/EleutherAI/pythia)
- [x] [Snowflake-Arctic MoE](https://huggingface.co/collections/Snowflake/arctic-66290090abe542894a5ac520)
Expand All @@ -89,6 +90,7 @@ Typically finetunes of the base models below are supported as well.
- [x] [SmolLM](https://huggingface.co/collections/HuggingFaceTB/smollm-6695016cad7167254ce15966)
- [x] [EXAONE-3.0-7.8B-Instruct](https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct)
- [x] [FalconMamba Models](https://huggingface.co/collections/tiiuae/falconmamba-7b-66b9a580324dd1598b0f6d4a)
- [x] [Jais](https://huggingface.co/inceptionai/jais-13b-chat)

(instructions for supporting more models: [HOWTO-add-model.md](./docs/development/HOWTO-add-model.md))

Expand Down Expand Up @@ -163,6 +165,7 @@ Unless otherwise noted these projects are open-source with permissive licensing:
- [AI Sublime Text plugin](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (MIT)
- [AIKit](https://github.com/sozercan/aikit) (MIT)
- [LARS - The LLM & Advanced Referencing Solution](https://github.com/abgulati/LARS) (AGPL)
- [LLMUnity](https://github.com/undreamai/LLMUnity) (MIT)

*(to have a project listed here, it should clearly state that it depends on `llama.cpp`)*

Expand All @@ -171,6 +174,7 @@ Unless otherwise noted these projects are open-source with permissive licensing:
- [akx/ggify](https://github.com/akx/ggify) – download PyTorch models from HuggingFace Hub and convert them to GGML
- [crashr/gppm](https://github.com/crashr/gppm) – launch llama.cpp instances utilizing NVIDIA Tesla P40 or P100 GPUs with reduced idle power consumption
- [gpustack/gguf-parser](https://github.com/gpustack/gguf-parser-go/tree/main/cmd/gguf-parser) - review/check the GGUF file and estimate the memory usage
- [Styled Lines](https://marketplace.unity.com/packages/tools/generative-ai/styled-lines-llama-cpp-model-292902) (proprietary licensed, async wrapper of inference part for game development in Unity3d with prebuild Mobile and Web platform wrappers and a model example)

**Infrastructure:**

Expand Down
3 changes: 3 additions & 0 deletions ci/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -737,6 +737,9 @@ function gg_sum_embd_bge_small {

## main

export LLAMA_LOG_PREFIX=1
export LLAMA_LOG_TIMESTAMPS=1

if [ -z ${GG_BUILD_LOW_PERF} ]; then
# Create symlink: ./llama.cpp/models-mnt -> $MNT/models/models-mnt
rm -rf ${SRC}/models-mnt
Expand Down
20 changes: 12 additions & 8 deletions common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,19 +51,23 @@ endif()
set(TARGET common)

add_library(${TARGET} STATIC
arg.cpp
arg.h
base64.hpp
common.h
common.cpp
sampling.h
sampling.cpp
console.h
common.h
console.cpp
json.hpp
console.h
json-schema-to-grammar.cpp
train.h
train.cpp
ngram-cache.h
json.hpp
log.cpp
log.h
ngram-cache.cpp
ngram-cache.h
sampling.cpp
sampling.h
train.cpp
train.h
)

if (BUILD_SHARED_LIBS)
Expand Down
Loading

0 comments on commit 2c40066

Please sign in to comment.