Skip to content

Commit

Permalink
Merge branch 'master' into xsn/wasm_simd
Browse files Browse the repository at this point in the history
  • Loading branch information
ngxson committed Feb 12, 2025
2 parents 2ab608b + 31afcbe commit 19a4f16
Show file tree
Hide file tree
Showing 51 changed files with 2,214 additions and 921 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
| [HIP](docs/build.md#hip) | AMD GPU |
| [Vulkan](docs/build.md#vulkan) | GPU |
| [CANN](docs/build.md#cann) | Ascend NPU |
| [OpenCL](docs/backend/OPENCL.md) | Adreno GPU |

## Building the project

Expand Down
2 changes: 1 addition & 1 deletion common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
));
add_opt(common_arg(
{"--no-context-shift"},
string_format("disables context shift on inifinite text generation (default: %s)", params.ctx_shift ? "disabled" : "enabled"),
string_format("disables context shift on infinite text generation (default: %s)", params.ctx_shift ? "disabled" : "enabled"),
[](common_params & params) {
params.ctx_shift = false;
}
Expand Down
28 changes: 21 additions & 7 deletions common/chat-template.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,16 +249,30 @@ class chat_template {
inputs.add_generation_prompt = false;
full = apply(inputs);
}

if (full.find(prefix) != 0) {
if (prefix.rfind(eos_token_) == prefix.size() - eos_token_.size()) {
prefix = prefix.substr(0, prefix.size() - eos_token_.size());
auto eos_pos_last = full.rfind(eos_token_);
if (eos_pos_last == prefix.size() - eos_token_.size() ||
(full[full.size() - 1] == '\n' && (eos_pos_last == full.size() - eos_token_.size() - 1))) {
full = full.substr(0, eos_pos_last);
}
size_t common_prefix_length = 0;
for (size_t i = 0; i < prefix.size() && i < full.size(); ++i) {
if (prefix[i] != full[i]) {
break;
}
if (prefix[i] == '<') {
// DeepSeek R1's template (as of 20250209) adds a trailing <think> if add_generation_prompt,
// but it removes thinking tags for past messages.
// The prefix and full strings diverge at <think> vs. <|tool▁calls▁begin|>, we avoid consuming the leading <.
continue;
}
common_prefix_length = i + 1;
}
if (full.find(prefix) != 0) {
auto example = full.substr(common_prefix_length);
if (example.find("tool_name") == std::string::npos && example.find("some_value") == std::string::npos) {
fprintf(stderr, "Failed to infer a tool call example (possible template bug)\n");
} else {
tool_call_example_ = example;
}
tool_call_example_ = full.substr(prefix.size());
}
} catch (const std::exception & e) {
fprintf(stderr, "Failed to generate tool call example: %s\n", e.what());
Expand Down Expand Up @@ -363,7 +377,7 @@ class chat_template {
if (polyfill_tools) {
adjusted_messages = add_system(inputs.messages,
"You can call any of the following tools to satisfy the user's requests: " + minja::Value(inputs.tools).dump(2, /* to_json= */ true) +
(!polyfill_tool_call_example || tool_call_example_.empty() ? "" : "\n\nExample tool call syntax:\n\n" + tool_call_example_));
(!polyfill_tool_call_example || tool_call_example_.empty() ? "" : "\n\nExample tool call syntax:\n\n" + tool_call_example_ + "\n\n"));
} else {
adjusted_messages = inputs.messages;
}
Expand Down
12 changes: 6 additions & 6 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -424,13 +424,13 @@ bool set_process_priority(enum ggml_sched_priority prio);
//

#ifdef __GNUC__
#ifdef __MINGW32__
#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
# if defined(__MINGW32__) && !defined(__clang__)
# define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
# else
# define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
# endif
#else
#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
#endif
#else
#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...)
# define LLAMA_COMMON_ATTRIBUTE_FORMAT(...)
#endif

LLAMA_COMMON_ATTRIBUTE_FORMAT(1, 2)
Expand Down
1 change: 1 addition & 0 deletions common/log.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "log.h"

#include <chrono>
#include <condition_variable>
#include <cstdarg>
#include <cstdio>
Expand Down
3 changes: 2 additions & 1 deletion common/log.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "ggml.h" // for ggml_log_level

#define LOG_CLR_TO_EOL "\033[K\r"
#define LOG_COL_DEFAULT "\033[0m"
#define LOG_COL_BOLD "\033[1m"
#define LOG_COL_RED "\033[31m"
Expand All @@ -14,7 +15,7 @@

#ifndef __GNUC__
# define LOG_ATTRIBUTE_FORMAT(...)
#elif defined(__MINGW32__)
#elif defined(__MINGW32__) && !defined(__clang__)
# define LOG_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
#else
# define LOG_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
Expand Down
33 changes: 24 additions & 9 deletions common/minja.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1385,6 +1385,13 @@ static std::string strip(const std::string & s) {
return s.substr(start, end - start + 1);
}

static std::string capitalize(const std::string & s) {
if (s.empty()) return s;
auto result = s;
result[0] = std::toupper(result[0]);
return result;
}

static std::string html_escape(const std::string & s) {
std::string result;
result.reserve(s.size());
Expand Down Expand Up @@ -1462,6 +1469,9 @@ class MethodCallExpr : public Expression {
if (method->get_name() == "strip") {
vargs.expectArgs("strip method", {0, 0}, {0, 0});
return Value(strip(str));
} else if (method->get_name() == "capitalize") {
vargs.expectArgs("capitalize method", {0, 0}, {0, 0});
return Value(capitalize(str));
} else if (method->get_name() == "endswith") {
vargs.expectArgs("endswith method", {1, 1}, {0, 0});
auto suffix = vargs.args[0].get<std::string>();
Expand Down Expand Up @@ -1792,7 +1802,7 @@ class Parser {
auto left = parseStringConcat();
if (!left) throw std::runtime_error("Expected left side of 'logical compare' expression");

static std::regex compare_tok(R"(==|!=|<=?|>=?|in\b|is\b|not[\r\n\s]+in\b)");
static std::regex compare_tok(R"(==|!=|<=?|>=?|in\b|is\b|not\s+in\b)");
static std::regex not_tok(R"(not\b)");
std::string op_str;
while (!(op_str = consumeToken(compare_tok)).empty()) {
Expand Down Expand Up @@ -2171,7 +2181,7 @@ class Parser {
using TemplateTokenIterator = TemplateTokenVector::const_iterator;

std::vector<std::string> parseVarNames() {
static std::regex varnames_regex(R"(((?:\w+)(?:[\r\n\s]*,[\r\n\s]*(?:\w+))*)[\r\n\s]*)");
static std::regex varnames_regex(R"(((?:\w+)(?:\s*,\s*(?:\w+))*)\s*)");

std::vector<std::string> group;
if ((group = consumeTokenGroups(varnames_regex)).empty()) throw std::runtime_error("Expected variable names");
Expand All @@ -2194,13 +2204,13 @@ class Parser {
}

TemplateTokenVector tokenize() {
static std::regex comment_tok(R"(\{#([-~]?)([\s\S\r\n]*?)([-~]?)#\})");
static std::regex comment_tok(R"(\{#([-~]?)([\s\S]*?)([-~]?)#\})");
static std::regex expr_open_regex(R"(\{\{([-~])?)");
static std::regex block_open_regex(R"(^\{%([-~])?[\s\n\r]*)");
static std::regex block_open_regex(R"(^\{%([-~])?\s*)");
static std::regex block_keyword_tok(R"((if|else|elif|endif|for|endfor|generation|endgeneration|set|endset|block|endblock|macro|endmacro|filter|endfilter|break|continue)\b)");
static std::regex non_text_open_regex(R"(\{\{|\{%|\{#)");
static std::regex expr_close_regex(R"([\s\n\r]*([-~])?\}\})");
static std::regex block_close_regex(R"([\s\n\r]*([-~])?%\})");
static std::regex expr_close_regex(R"(\s*([-~])?\}\})");
static std::regex block_close_regex(R"(\s*([-~])?%\})");

TemplateTokenVector tokens;
std::vector<std::string> group;
Expand Down Expand Up @@ -2284,7 +2294,7 @@ class Parser {
auto post_space = parseBlockClose();
tokens.push_back(std::make_unique<EndGenerationTemplateToken>(location, pre_space, post_space));
} else if (keyword == "set") {
static std::regex namespaced_var_regex(R"((\w+)[\s\n\r]*\.[\s\n\r]*(\w+))");
static std::regex namespaced_var_regex(R"((\w+)\s*\.\s*(\w+))");

std::string ns;
std::vector<std::string> var_names;
Expand Down Expand Up @@ -2336,6 +2346,11 @@ class Parser {
throw std::runtime_error("Unexpected block: " + keyword);
}
} else if (std::regex_search(it, end, match, non_text_open_regex)) {
if (!match.position()) {
if (match[0] != "{#")
throw std::runtime_error("Internal error: Expected a comment");
throw std::runtime_error("Missing end of comment tag");
}
auto text_end = it + match.position();
text = std::string(it, text_end);
it = text_end;
Expand Down Expand Up @@ -2400,7 +2415,7 @@ class Parser {

auto text = text_token->text;
if (post_space == SpaceHandling::Strip) {
static std::regex trailing_space_regex(R"((\s|\r|\n)+$)");
static std::regex trailing_space_regex(R"(\s+$)");
text = std::regex_replace(text, trailing_space_regex, "");
} else if (options.lstrip_blocks && it != end) {
auto i = text.size();
Expand All @@ -2410,7 +2425,7 @@ class Parser {
}
}
if (pre_space == SpaceHandling::Strip) {
static std::regex leading_space_regex(R"(^(\s|\r|\n)+)");
static std::regex leading_space_regex(R"(^\s+)");
text = std::regex_replace(text, leading_space_regex, "");
} else if (options.trim_blocks && (it - 1) != begin && !dynamic_cast<ExpressionTemplateToken*>((*(it - 2)).get())) {
if (text.length() > 0 && text[0] == '\n') {
Expand Down
2 changes: 1 addition & 1 deletion common/speculative.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ struct common_speculative_params {
int n_draft = 16; // max drafted tokens
int n_reuse = 256;

float p_min = 0.9f; // min probabiliy required to accept a token in the draft
float p_min = 0.9f; // min probability required to accept a token in the draft
};

struct common_speculative * common_speculative_init(struct llama_context * ctx_dft);
Expand Down
Loading

0 comments on commit 19a4f16

Please sign in to comment.