From f8142275058615977e1826edc861ecc7c58adafa Mon Sep 17 00:00:00 2001 From: madschemas <155993105+MadSchemas@users.noreply.github.com> Date: Tue, 4 Feb 2025 20:26:29 +0300 Subject: [PATCH] Update to version v4.20.0 --- .github/workflows/install_grpc.sh | 2 +- .github/workflows/test.yml | 16 +- bindings/builtin/builtin.go | 2 +- bindings/consts.go | 2 +- changelog.md | 27 +- cjson/decoder.go | 3 +- ...-clang-tidy-18.py => run-clang-tidy-19.py} | 4 +- cpp_src/CMakeLists.txt | 9 +- cpp_src/client/itemimplbase.h | 3 +- cpp_src/client/queryresults.h | 10 +- cpp_src/cmd/reindexer_tool/CMakeLists.txt | 2 +- cpp_src/core/cbinding/reindexer_c.cc | 27 +- cpp_src/core/cjson/cjsondecoder.cc | 10 +- cpp_src/core/cjson/cjsondecoder.h | 7 +- cpp_src/core/cjson/cjsontools.cc | 5 + cpp_src/core/cjson/cjsontools.h | 26 + cpp_src/core/cjson/jsondecoder.cc | 7 +- cpp_src/core/cjson/msgpackdecoder.cc | 52 +- cpp_src/core/cjson/msgpackdecoder.h | 1 - cpp_src/core/cjson/protobufdecoder.cc | 7 +- cpp_src/core/cjson/protobufschemabuilder.h | 1 + cpp_src/core/cjson/tagspath.h | 22 +- cpp_src/core/clusterproxy.cc | 2 +- cpp_src/core/defnsconfigs.h | 6 +- cpp_src/core/formatters/key_string_fmt.h | 4 +- cpp_src/core/formatters/namespacesname_fmt.h | 8 + cpp_src/core/ft/ft_fast/dataholder.h | 7 +- cpp_src/core/ft/ft_fast/dataprocessor.cc | 44 +- cpp_src/core/ft/ft_fast/dataprocessor.h | 2 +- cpp_src/core/ft/ft_fast/frisochartypes.cc | 12 +- cpp_src/core/ft/ft_fast/selecter.cc | 2 +- cpp_src/core/ft/ftsetcashe.h | 4 +- cpp_src/core/ft/numtotext.cc | 195 +- cpp_src/core/ft/numtotext.h | 2 +- cpp_src/core/ft/stopwords/stop_en.cc | 25 +- cpp_src/core/ft/stopwords/stop_ru.cc | 378 +-- cpp_src/core/idsetcache.h | 5 +- cpp_src/core/index/index.cc | 11 +- cpp_src/core/index/index.h | 13 +- cpp_src/core/index/indexfastupdate.cc | 66 + cpp_src/core/index/indexfastupdate.h | 18 + cpp_src/core/index/indexordered.cc | 20 +- cpp_src/core/index/indexstore.cc | 17 +- cpp_src/core/index/indextext/fastindextext.cc | 46 +- cpp_src/core/index/indextext/fastindextext.h | 4 +- cpp_src/core/index/indextext/fieldsgetter.h | 8 +- cpp_src/core/index/indextext/indextext.cc | 32 +- cpp_src/core/index/indextext/indextext.h | 15 +- cpp_src/core/index/indexunordered.cc | 62 +- cpp_src/core/index/indexunordered.h | 23 +- cpp_src/core/index/rtree/indexrtree.cc | 6 +- cpp_src/core/index/string_map.h | 16 +- cpp_src/core/indexopts.cc | 29 +- cpp_src/core/indexopts.h | 9 +- cpp_src/core/item.cc | 10 +- cpp_src/core/itemimpl.cc | 27 +- cpp_src/core/itemimpl.h | 20 +- cpp_src/core/itemimplrawdata.h | 7 +- cpp_src/core/joincache.h | 8 +- cpp_src/core/key_value_type.h | 11 + cpp_src/core/keyvalue/key_string.cc | 10 + cpp_src/core/keyvalue/key_string.h | 187 +- cpp_src/core/keyvalue/p_string.h | 24 +- cpp_src/core/keyvalue/uuid.cc | 6 + cpp_src/core/keyvalue/uuid.h | 1 + cpp_src/core/keyvalue/variant.cc | 56 +- cpp_src/core/keyvalue/variant.h | 5 +- cpp_src/core/lrucache.cc | 63 +- cpp_src/core/lrucache.h | 156 +- cpp_src/core/namespace/namespace.cc | 4 + cpp_src/core/namespace/namespaceimpl.cc | 115 +- cpp_src/core/namespace/namespaceimpl.h | 10 +- cpp_src/core/namespace/namespacestat.cc | 41 +- cpp_src/core/namespace/namespacestat.h | 29 +- cpp_src/core/namespace/snapshot/snapshot.cc | 2 +- cpp_src/core/namespace/stringsholder.h | 15 +- cpp_src/core/namespacedef.h | 2 +- cpp_src/core/nsselecter/btreeindexiterator.h | 28 +- .../core/nsselecter/btreeindexiteratorimpl.h | 7 +- .../comparator/comparator_indexed.h | 2 +- .../equalposition_comparator_impl.h | 4 +- cpp_src/core/nsselecter/joinedselector.cc | 20 +- cpp_src/core/nsselecter/nsselecter.cc | 71 +- cpp_src/core/nsselecter/querypreprocessor.cc | 23 +- cpp_src/core/nsselecter/querypreprocessor.h | 2 +- .../nsselecter/selectiteratorcontainer.cc | 64 +- cpp_src/core/parallelexecutor.h | 1 + cpp_src/core/payload/fieldsset.h | 2 + cpp_src/core/payload/payloadfieldtype.cc | 9 + cpp_src/core/payload/payloadfieldtype.h | 7 +- cpp_src/core/payload/payloadiface.cc | 45 +- cpp_src/core/payload/payloadiface.h | 1 + cpp_src/core/payload/payloadtype.cc | 5 +- cpp_src/core/payload/payloadvalue.cc | 8 - cpp_src/core/payload/payloadvalue.h | 23 +- cpp_src/core/query/dsl/dslparser.cc | 52 +- cpp_src/core/query/dsl/dslparser.h | 3 +- cpp_src/core/query/query.cc | 78 +- cpp_src/core/query/query.h | 30 +- cpp_src/core/query/sql/sqlparser.cc | 80 +- cpp_src/core/query/sql/sqlparser.h | 2 +- cpp_src/core/querycache.h | 9 +- cpp_src/core/queryresults/itemref.h | 15 +- .../core/queryresults/localqueryresults.cc | 29 +- cpp_src/core/queryresults/localqueryresults.h | 11 +- cpp_src/core/queryresults/queryresults.cc | 176 +- cpp_src/core/queryresults/queryresults.h | 62 +- cpp_src/core/reindexer_impl/reindexerimpl.cc | 43 +- .../core/selectfunc/functions/debugrank.cc | 12 +- .../core/selectfunc/functions/highlight.cc | 8 +- cpp_src/core/selectfunc/functions/snippet.cc | 17 +- cpp_src/core/selectfunc/functions/snippet.h | 7 +- cpp_src/core/selectkeyresult.h | 8 +- cpp_src/core/sorting/sortexpression.cc | 31 +- cpp_src/debug/terminate_handler.cpp | 3 + cpp_src/estl/elist.h | 2 +- cpp_src/estl/h_vector.h | 39 +- cpp_src/estl/intrusive_ptr.h | 66 +- cpp_src/estl/suffix_map.h | 16 +- cpp_src/estl/tokenizer.h | 9 +- cpp_src/gtests/tests/API/api.cc | 1 + cpp_src/gtests/tests/API/base_tests.cc | 38 +- cpp_src/gtests/tests/CMakeLists.txt | 10 +- cpp_src/gtests/tests/fixtures/ft_api.cc | 16 +- cpp_src/gtests/tests/fixtures/ft_api.h | 6 +- .../fixtures/fuzzing/random_generator.cc | 2 +- .../gtests/tests/fixtures/grpcclient_api.h | 2 +- cpp_src/gtests/tests/fixtures/queries_api.cc | 5 +- cpp_src/gtests/tests/fixtures/queries_api.h | 4 +- .../gtests/tests/fixtures/queries_verifier.h | 3 +- .../gtests/tests/fixtures/reindexertestapi.cc | 15 + .../gtests/tests/fixtures/reindexertestapi.h | 2 + .../gtests/tests/fixtures/systemhelpers.cc | 1 + .../tests/unit/cascade_replication_test.cc | 50 + .../gtests/tests/unit/clusterproxy_test.cc | 1 + .../tests/unit/composite_indexes_test.cc | 118 +- cpp_src/gtests/tests/unit/dsl_parser_test.cc | 3 +- .../gtests/tests/unit/equalposition_tests.cc | 38 + cpp_src/gtests/tests/unit/ft/ft_generic.cc | 40 +- .../tests/unit/ft/ft_incremental_build.cc | 4 +- cpp_src/gtests/tests/unit/namespace_test.cc | 17 +- cpp_src/gtests/tests/unit/queries_test.cc | 16 +- cpp_src/gtests/tests/unit/replication_test.cc | 702 ------ cpp_src/gtests/tests/unit/rpcclient_test.cc | 14 +- .../gtests/tests/unit/sharding_system_test.cc | 2 + .../gtests/tests/unit/string_function_test.cc | 64 +- .../tests/unit/synccororeindexer_test.cc | 1 + cpp_src/gtests/tests/unit/tolal_lru_cache.cc | 4 +- cpp_src/net/cproto/cproto.h | 2 +- cpp_src/net/cproto/dispatcher.h | 29 +- cpp_src/net/cproto/serverconnection.cc | 12 + cpp_src/net/listener.cc | 2 +- cpp_src/server/contrib/server.md | 23 +- cpp_src/server/contrib/server.yml | 2188 +++++++++-------- cpp_src/server/grpc/CMakeLists.txt | 2 +- cpp_src/server/grpc/reindexerservice.cc | 6 +- cpp_src/server/httpserver.cc | 32 +- cpp_src/server/rpcserver.cc | 2 +- .../vendor/prometheus/impl/check_names.cc | 12 +- cpp_src/tools/errors.cc | 9 + cpp_src/tools/errors.h | 30 +- cpp_src/tools/fsops.cc | 3 +- cpp_src/tools/lsn.h | 9 +- cpp_src/tools/stringstools.cc | 43 +- cpp_src/tools/varint.h | 2 +- cpp_src/wal/walrecord.h | 15 +- cpp_src/wal/walselecter.cc | 21 +- cpp_src/wal/waltracker.cc | 11 +- describer.go | 35 +- fulltext.md | 2 +- reflect.go | 14 +- reindexer.go | 1 + test/dsl_test.go | 4 +- test/encdec_test.go | 164 +- test/index_struct_test.go | 6 +- test/storage_test.go | 95 + 176 files changed, 3694 insertions(+), 3534 deletions(-) rename clang-tidy/{run-clang-tidy-18.py => run-clang-tidy-19.py} (99%) create mode 100644 cpp_src/core/index/indexfastupdate.cc create mode 100644 cpp_src/core/index/indexfastupdate.h create mode 100644 cpp_src/core/keyvalue/key_string.cc delete mode 100644 cpp_src/gtests/tests/unit/replication_test.cc diff --git a/.github/workflows/install_grpc.sh b/.github/workflows/install_grpc.sh index f8b01e57a..36b31bcae 100755 --- a/.github/workflows/install_grpc.sh +++ b/.github/workflows/install_grpc.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -git clone --branch v1.44.0 https://github.com/grpc/grpc +git clone --branch v1.54.3 https://github.com/grpc/grpc cd grpc git submodule update --init mkdir -p cmake/build diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 3ae5a654d..e7d0cdbc7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -47,15 +47,10 @@ jobs: build: strategy: matrix: - os: [ubuntu-22.04, macos-13] + os: [ubuntu-22.04, ubuntu-24.04, macos-13] include: - - os: ubuntu-24.04 - cc: gcc-12 - cxx: g++-12 - os: ubuntu-latest sanitizer: ASAN - cc: gcc-10 - cxx: g++-10 - os: ubuntu-latest sanitizer: TSAN cc: gcc-12 @@ -66,6 +61,7 @@ jobs: OS: ${{matrix.os}} CC: ${{matrix.cc}} CXX: ${{matrix.cxx}} + SANITIZER: ${{matrix.sanitizer}} steps: - name: Checkout repository uses: actions/checkout@v4 @@ -85,7 +81,11 @@ jobs: export CPLUS_INCLUDE_PATH=$GITHUB_WORKSPACE/grpc/third_party/abseil-cpp mkdir build && cd build if [[ $OS == ubuntu-latest ]]; then - cmake -DENABLE_GRPC=ON -DWITH_${{matrix.sanitizer}}=On .. + if [[ $SANITIZER == 'ASAN' ]]; then + cmake -DENABLE_GRPC=ON -DWITH_${{matrix.sanitizer}}=On -DGTEST_SKIP_SHARDING=On .. + else + cmake -DENABLE_GRPC=ON -DWITH_${{matrix.sanitizer}}=On .. + fi elif [[ $OS == macos* ]]; then cmake -DGH_CI_OSX=ON .. else @@ -119,8 +119,6 @@ jobs: include: - os: ubuntu-latest sanitizer: ASAN - cc: gcc-10 - cxx: g++-10 test: 'C++' - os: ubuntu-latest sanitizer: ASAN diff --git a/bindings/builtin/builtin.go b/bindings/builtin/builtin.go index 303dc94f7..f73bc5fe3 100644 --- a/bindings/builtin/builtin.go +++ b/bindings/builtin/builtin.go @@ -255,7 +255,7 @@ func (binding *Builtin) Init(u []url.URL, eh bindings.EventsHandler, options ... caps: C.int64_t(caps.Value), } - return err2go(C.reindexer_connect_v4(binding.rx, str2c(u[0].Path), opts, str2c(bindings.ReindexerVersion), ccaps)) + return err2go(C.reindexer_connect_v4(binding.rx, str2c(u[0].Host+u[0].Path), opts, str2c(bindings.ReindexerVersion), ccaps)) } func (binding *Builtin) StartWatchOnCtx(ctx context.Context) (CCtxWrapper, error) { diff --git a/bindings/consts.go b/bindings/consts.go index 5ad688c39..3135f9b84 100644 --- a/bindings/consts.go +++ b/bindings/consts.go @@ -2,7 +2,7 @@ package bindings const CInt32Max = int(^uint32(0) >> 1) -const ReindexerVersion = "v4.19.0" +const ReindexerVersion = "v4.20.0" // public go consts from type_consts.h and reindexer_ctypes.h const ( diff --git a/changelog.md b/changelog.md index 973647fc9..b43f304e3 100644 --- a/changelog.md +++ b/changelog.md @@ -1,7 +1,32 @@ +# Version 4.20.0 (04.02.2025) +## Core +- [fea] Optimized indexed strings memory layout (each unique indexed string now requires 20-36 bytes less memery, depending on platform) +- [fea] Optimized non-built btree-index selection +- [fea] Optimized selections with large documents count in results +- [fea] Reduced allocations count in inserts/upserts and select-queries +- [fea] Changed index compatibility check in `AddIndex`: now `dense` and `regular` indexes are treated compatible +- [fix] Fixed parsing for [EQUAL_POSITION](readme.md#search-in-array-fields-with-matching-array-indexes) with brackets in SQL + +## Replication +- [fea] Improved full sync performance for namespaces with large amount of empty documents + +## Reindexer server +- [fix] Added more TCP-requests format checks +- [fix] Fixed [server.yml](cpp_src/server/contrib/server.yml) format errors + +## Go connector +- [fea] Added support for absolute path on Windows platforms in `builtin`-binding + +## Build +- [fix] Fixed build for ARM docker image + +## Ported +- [fea/fix] Ported all the fixes and features from [v3.31.0](https://github.com/Restream/reindexer/releases/tag/v3.31.0) + # Version 4.19.0 (17.12.2024) ## Core -- [fea] Added automatic masking for reindexer user's credentials in log files and cluster/sharding JSON's (except for `async_replication` configs in `#config`-namespace) +- [fea] Added automatic masking for reindexer user's credentials in log files and cluster/sharding JSON's (except for `async_replication`-config in `#config`-namespace) - [fix] Fixed assertion on attempt to use 'null'-values with `=`, `IN()`, `<`, `>`, `<=`, `>=` and `RANGE()` operators ## Replication diff --git a/cjson/decoder.go b/cjson/decoder.go index 1f5371ef4..16245c9bc 100644 --- a/cjson/decoder.go +++ b/cjson/decoder.go @@ -36,8 +36,7 @@ func fieldByTag(t reflect.Type, tag string) (result reflect.StructField, ok bool } for i := 0; i < t.NumField(); i++ { result = t.Field(i) - if ftag := result.Tag.Get("json"); len(ftag) > 0 { - ftag, _ = splitStr(ftag, ',') + if ftag, _ := splitStr(result.Tag.Get("json"), ','); len(ftag) > 0 { if tag == ftag { return result, true } diff --git a/clang-tidy/run-clang-tidy-18.py b/clang-tidy/run-clang-tidy-19.py similarity index 99% rename from clang-tidy/run-clang-tidy-18.py rename to clang-tidy/run-clang-tidy-19.py index bb6352344..508d95765 100755 --- a/clang-tidy/run-clang-tidy-18.py +++ b/clang-tidy/run-clang-tidy-19.py @@ -251,10 +251,10 @@ def main(): action='store_true', help='allow alpha checkers from ' 'clang-analyzer.') parser.add_argument('-clang-tidy-binary', metavar='PATH', - default='clang-tidy-18', + default='clang-tidy-19', help='path to clang-tidy binary') parser.add_argument('-clang-apply-replacements-binary', metavar='PATH', - default='clang-apply-replacements-18', + default='clang-apply-replacements-19', help='path to clang-apply-replacements binary') parser.add_argument('-checks', default=None, help='checks filter, when not specified, use clang-tidy ' diff --git a/cpp_src/CMakeLists.txt b/cpp_src/CMakeLists.txt index 177bac80d..f7e501382 100644 --- a/cpp_src/CMakeLists.txt +++ b/cpp_src/CMakeLists.txt @@ -50,7 +50,7 @@ else() option(LINK_RESOURCES "Link web resources as binary data" ON) endif() -set (REINDEXER_VERSION_DEFAULT "4.19.0") +set (REINDEXER_VERSION_DEFAULT "4.20.0") if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "RelWithDebInfo") @@ -72,6 +72,13 @@ if(MSVC) set(CMAKE_C_FLAGS_RELWITHDEBINFO "-O2 -Zi") set(CMAKE_CXX_FLAGS_RELEASE "-O2 -DNDEBUG -Zi") set(CMAKE_C_FLAGS_RELEASE "-O2 -DNDEBUG -Zi") +elseif(WITH_ASAN) + # Using O2 instead of O3 to build a bit faster. + # Also this allows to avoid SEGFAULT in libasan.so during coroutines interaction on CentOS7 (gcc-12). + set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g1") + set(CMAKE_C_FLAGS_RELWITHDEBINFO "-O2 -g1") + set(CMAKE_CXX_FLAGS_RELEASE "-O2 -DNDEBUG") + set(CMAKE_C_FLAGS_RELEASE "-O2 -DNDEBUG") else() set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g1") set(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -g1") diff --git a/cpp_src/client/itemimplbase.h b/cpp_src/client/itemimplbase.h index 561d24873..ac3d6acfc 100644 --- a/cpp_src/client/itemimplbase.h +++ b/cpp_src/client/itemimplbase.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include "core/keyvalue/variant.h" #include "core/payload/payloadiface.h" @@ -76,7 +75,7 @@ class ItemImplBase { std::vector precepts_; bool unsafe_ = false; - std::deque holder_; + h_vector holder_; std::vector> largeJSONStrings_; }; diff --git a/cpp_src/client/queryresults.h b/cpp_src/client/queryresults.h index 8924b0690..0aeb14ece 100644 --- a/cpp_src/client/queryresults.h +++ b/cpp_src/client/queryresults.h @@ -37,9 +37,8 @@ class QueryResults { class Iterator : public CoroQueryResults::Iterator { public: - Iterator(const QueryResults* r, const CoroQueryResults* qr, int idx, int pos, int nextPos, - ResultSerializer::ItemParams itemParams) noexcept - : CoroQueryResults::Iterator{qr, idx, pos, nextPos, itemParams, {}}, r_(r) {} + Iterator(const QueryResults& r, int idx, int pos, int nextPos, ResultSerializer::ItemParams itemParams) noexcept + : CoroQueryResults::Iterator{&r.results_, idx, pos, nextPos, itemParams, {}}, r_(&r) {} Iterator& operator*() { return *this; } Iterator& operator++() noexcept { try { @@ -57,11 +56,12 @@ class QueryResults { } return *this; } + const QueryResults* r_; }; - Iterator begin() const noexcept { return Iterator{this, &results_, 0, 0, 0, {}}; } - Iterator end() const noexcept { return Iterator{this, &results_, int(results_.Count()), 0, 0, {}}; } + Iterator begin() const noexcept { return Iterator{*this, 0, 0, 0, {}}; } + Iterator end() const noexcept { return Iterator{*this, int(results_.Count()), 0, 0, {}}; } size_t Count() const noexcept { return results_.Count(); } int TotalCount() const noexcept { return results_.TotalCount(); } diff --git a/cpp_src/cmd/reindexer_tool/CMakeLists.txt b/cpp_src/cmd/reindexer_tool/CMakeLists.txt index 4b39f2425..f13bcbbc1 100644 --- a/cpp_src/cmd/reindexer_tool/CMakeLists.txt +++ b/cpp_src/cmd/reindexer_tool/CMakeLists.txt @@ -22,7 +22,7 @@ if (NOT MSVC AND NOT WITH_STDLIB_DEBUG) ExternalProject_Add( replxx_lib GIT_REPOSITORY "https://github.com/Restream/replxx" - GIT_TAG "b50b7b7a8c2835b45607cffabc18e4742072e9e6" + GIT_TAG "98aa91965d7495e030f31c6f05969177fe5ab81d" CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR} ) include_directories(${CMAKE_CURRENT_BINARY_DIR}/include) diff --git a/cpp_src/core/cbinding/reindexer_c.cc b/cpp_src/core/cbinding/reindexer_c.cc index 78f99e957..b0170e936 100644 --- a/cpp_src/core/cbinding/reindexer_c.cc +++ b/cpp_src/core/cbinding/reindexer_c.cc @@ -1,8 +1,7 @@ #include "reindexer_c.h" -#include #include -#include +#include #include #include @@ -18,7 +17,7 @@ using namespace reindexer; constexpr int kQueryResultsPoolSize = 1024; -constexpr int kMaxConcurentQueries = 65534; +constexpr int kMaxConcurrentQueries = 65534; constexpr size_t kCtxArrSize = 1024; constexpr size_t kWarnLargeResultsLimit = 0x40000000; constexpr size_t kMaxPooledResultsCap = 0x10000; @@ -86,7 +85,7 @@ struct TransactionWrapper { }; static std::atomic serializedResultsCount{0}; -static sync_pool res_pool; +static sync_pool res_pool; static CGOCtxPool ctx_pool(kCtxArrSize); struct put_results_to_pool { @@ -154,7 +153,7 @@ static void results2c(std::unique_ptr result, struct reinde } out->results_ptr = uintptr_t(result.release()); - if (const auto count{serializedResultsCount.fetch_add(1, std::memory_order_relaxed)}; count > kMaxConcurentQueries) { + if (const auto count{serializedResultsCount.fetch_add(1, std::memory_order_relaxed)}; count > kMaxConcurrentQueries) { logPrintf(LogWarning, "Too many serialized results: count=%d, alloced=%d", count, res_pool.Alloced()); } } @@ -186,7 +185,7 @@ reindexer_error reindexer_ping(uintptr_t rx) { return error2c(db ? Error(errOK) : err_not_init); } -static void procces_packed_item(Item& item, int /*mode*/, int state_token, reindexer_buffer data, int format, Error& err) { +static void proccess_packed_item(Item& item, int /*mode*/, int state_token, reindexer_buffer data, int format, Error& err) { if (item.Status().ok()) { switch (format) { case FormatJson: @@ -212,7 +211,7 @@ static void procces_packed_item(Item& item, int /*mode*/, int state_token, reind reindexer_error reindexer_modify_item_packed_tx(uintptr_t rx, uintptr_t tr, reindexer_buffer args, reindexer_buffer data) { auto db = reinterpret_cast(rx); - TransactionWrapper* trw = reinterpret_cast(tr); + auto trw = reinterpret_cast(tr); if (!db) { return error2c(err_not_init); } @@ -226,12 +225,12 @@ reindexer_error reindexer_modify_item_packed_tx(uintptr_t rx, uintptr_t tr, rein int state_token = ser.GetVarUint(); Error err = err_not_init; auto item = trw->tr_.NewItem(); - procces_packed_item(item, mode, state_token, data, format, err); + proccess_packed_item(item, mode, state_token, data, format, err); if (err.code() == errTagsMissmatch) { item = db->rx.NewItem(trw->tr_.GetNsName()); err = item.Status(); if (err.ok()) { - procces_packed_item(item, mode, state_token, data, format, err); + proccess_packed_item(item, mode, state_token, data, format, err); } } if (err.ok()) { @@ -263,7 +262,7 @@ reindexer_ret reindexer_modify_item_packed(uintptr_t rx, reindexer_buffer args, Item item = rdxKeeper.db().NewItem(ns); - procces_packed_item(item, mode, state_token, data, format, err); + proccess_packed_item(item, mode, state_token, data, format, err); query_results_ptr res; if (err.ok()) { @@ -677,7 +676,7 @@ reindexer_ret reindexer_update_query(uintptr_t rx, reindexer_buffer in, reindexe reindexer_error reindexer_delete_query_tx(uintptr_t rx, uintptr_t tr, reindexer_buffer in) { auto db = reinterpret_cast(rx); - TransactionWrapper* trw = reinterpret_cast(tr); + auto trw = reinterpret_cast(tr); if (!db) { return error2c(err_not_init); } @@ -698,7 +697,7 @@ reindexer_error reindexer_delete_query_tx(uintptr_t rx, uintptr_t tr, reindexer_ reindexer_error reindexer_update_query_tx(uintptr_t rx, uintptr_t tr, reindexer_buffer in) { auto db = reinterpret_cast(rx); - TransactionWrapper* trw = reinterpret_cast(tr); + auto trw = reinterpret_cast(tr); if (!db) { return error2c(err_not_init); } @@ -761,7 +760,7 @@ reindexer_ret reindexer_enum_meta(uintptr_t rx, reindexer_string ns, reindexer_c out.len = ser.Len(); out.data = uintptr_t(ser.Buf()); out.results_ptr = uintptr_t(results.release()); - if (const auto count{serializedResultsCount.fetch_add(1, std::memory_order_relaxed)}; count > kMaxConcurentQueries) { + if (const auto count{serializedResultsCount.fetch_add(1, std::memory_order_relaxed)}; count > kMaxConcurrentQueries) { logPrintf(LogWarning, "Too many serialized results: count=%d, alloced=%d", count, res_pool.Alloced()); } } @@ -794,7 +793,7 @@ reindexer_ret reindexer_get_meta(uintptr_t rx, reindexer_string ns, reindexer_st out.len = results->ser.Len(); out.data = uintptr_t(results->ser.Buf()); out.results_ptr = uintptr_t(results.release()); - if (const auto count{serializedResultsCount.fetch_add(1, std::memory_order_relaxed)}; count > kMaxConcurentQueries) { + if (const auto count{serializedResultsCount.fetch_add(1, std::memory_order_relaxed)}; count > kMaxConcurrentQueries) { logPrintf(LogWarning, "Too many serialized results: count=%d, alloced=%d", count, res_pool.Alloced()); } } diff --git a/cpp_src/core/cjson/cjsondecoder.cc b/cpp_src/core/cjson/cjsondecoder.cc index 02ae6b95e..f7c532e8b 100644 --- a/cpp_src/core/cjson/cjsondecoder.cc +++ b/cpp_src/core/cjson/cjsondecoder.cc @@ -40,11 +40,12 @@ bool CJsonDecoder::decodeCJson(Payload& pl, Serializer& rdser, WrSerializer& wrs const auto& fieldRef{pl.Type().Field(field)}; const KeyValueType fieldType{fieldRef.Type()}; if (tagType == TAG_ARRAY) { + const carraytag atag = rdser.GetCArrayTag(); + const auto count = atag.Count(); if rx_unlikely (!fieldRef.IsArray()) { throwUnexpectedArrayError(fieldRef); } - const carraytag atag = rdser.GetCArrayTag(); - const auto count = atag.Count(); + validateArrayFieldRestrictions(fieldRef, count, "cjson"); const int ofs = pl.ResizeArray(field, count, true); const TagType atagType = atag.Type(); if (atagType != TAG_OBJECT) { @@ -61,6 +62,7 @@ bool CJsonDecoder::decodeCJson(Payload& pl, Serializer& rdser, WrSerializer& wrs wrser.PutVarUint(count); } else { validateNonArrayFieldRestrictions(objectScalarIndexes_, pl, fieldRef, field, isInArray(), "cjson"); + validateArrayFieldRestrictions(fieldRef, 1, "cjson"); objectScalarIndexes_.set(field); pl.Set(field, cjsonValueToVariant(tagType, rdser, fieldType), true); fieldType.EvaluateOneOf( @@ -121,8 +123,8 @@ bool CJsonDecoder::decodeCJson(Payload& pl, Serializer& rdser, WrSerializer& wrs [[nodiscard]] Variant CJsonDecoder::cjsonValueToVariant(TagType tagType, Serializer& rdser, KeyValueType fieldType) { if (fieldType.Is() && tagType != TagType::TAG_STRING) { - storage_.emplace_back(rdser.GetRawVariant(KeyValueType{tagType}).As()); - return Variant(p_string(&storage_.back()), Variant::no_hold_t{}); + auto& back = storage_.emplace_back(rdser.GetRawVariant(KeyValueType{tagType}).As()); + return Variant(p_string(back), Variant::no_hold_t{}); } else { return reindexer::cjsonValueToVariant(tagType, rdser, fieldType); } diff --git a/cpp_src/core/cjson/cjsondecoder.h b/cpp_src/core/cjson/cjsondecoder.h index 747c2fd7e..7dc4ed038 100644 --- a/cpp_src/core/cjson/cjsondecoder.h +++ b/cpp_src/core/cjson/cjsondecoder.h @@ -24,8 +24,9 @@ class Recoder { class CJsonDecoder { public: - explicit CJsonDecoder(TagsMatcher& tagsMatcher, std::deque& storage) noexcept - : tagsMatcher_(tagsMatcher), storage_(storage) {} + using StrHolderT = h_vector; + + explicit CJsonDecoder(TagsMatcher& tagsMatcher, StrHolderT& storage) noexcept : tagsMatcher_(tagsMatcher), storage_(storage) {} class SkipFilter { public: SkipFilter MakeCleanCopy() const noexcept { return SkipFilter(); } @@ -165,7 +166,7 @@ class CJsonDecoder { int32_t arrayLevel_{0}; ScalarIndexesSetT objectScalarIndexes_; // storage for owning strings obtained from numbers - std::deque& storage_; + StrHolderT& storage_; }; extern template bool CJsonDecoder::decodeCJson( diff --git a/cpp_src/core/cjson/cjsontools.cc b/cpp_src/core/cjson/cjsontools.cc index ecce2bdc4..f08e4d8b0 100644 --- a/cpp_src/core/cjson/cjsontools.cc +++ b/cpp_src/core/cjson/cjsontools.cc @@ -207,6 +207,11 @@ void throwScalarMultipleEncodesError(const Payload& pl, const PayloadFieldType& throw Error(errLogic, "Non-array field '%s' [%d] from '%s' can only be encoded once.", f.Name(), field, pl.Type().Name()); } +void throwUnexpectedArraySizeError(std::string_view parserName, const PayloadFieldType& f, int arraySize) { + throw Error(errParams, "%s array field '%s' for this index type must contain %d elements, but got %d", parserName, f.Name(), + f.ArrayDim(), arraySize); +} + static void dumpCjsonValue(TagType type, Serializer& cjson, std::ostream& dump) { switch (type) { case TAG_VARINT: diff --git a/cpp_src/core/cjson/cjsontools.h b/cpp_src/core/cjson/cjsontools.h index 93c618524..b0a10dec2 100644 --- a/cpp_src/core/cjson/cjsontools.h +++ b/cpp_src/core/cjson/cjsontools.h @@ -18,6 +18,7 @@ void skipCjsonTag(ctag tag, Serializer& rdser, std::array [[noreturn]] void throwUnexpectedNestedArrayError(std::string_view parserName, const PayloadFieldType& f); [[noreturn]] void throwScalarMultipleEncodesError(const Payload& pl, const PayloadFieldType& f, int field); +[[noreturn]] void throwUnexpectedArraySizeError(std::string_view parserName, const PayloadFieldType& f, int arraySize); RX_ALWAYS_INLINE void validateNonArrayFieldRestrictions(const ScalarIndexesSetT& scalarIndexes, const Payload& pl, const PayloadFieldType& f, int field, bool isInArray, std::string_view parserName) { if (!f.IsArray()) { @@ -30,6 +31,14 @@ RX_ALWAYS_INLINE void validateNonArrayFieldRestrictions(const ScalarIndexesSetT& } } +RX_ALWAYS_INLINE void validateArrayFieldRestrictions(const PayloadFieldType& f, int arraySize, std::string_view parserName) { + if (f.IsArray()) { + if rx_unlikely (arraySize && f.ArrayDim() > 0 && f.ArrayDim() != arraySize) { + throwUnexpectedArraySizeError(parserName, f, arraySize); + } + } +} + void DumpCjson(Serializer& cjson, std::ostream& dump, const ConstPayload*, const TagsMatcher* = nullptr, std::string_view tab = " "); inline void DumpCjson(Serializer&& cjson, std::ostream& dump, const ConstPayload* pl, const TagsMatcher* tm = nullptr, std::string_view tab = " ") { @@ -49,4 +58,21 @@ inline void DumpCjson(Serializer&& cjson, std::ostream& dump, const TagsMatcher* DumpCjson(cjson, dump, tm, tab); } +static inline Variant convertValueForPayload(Payload& pl, int field, Variant&& value, std::string_view source) { + if (field < 0) { + return value; + } + + auto plFieldType = pl.Type().Field(field).Type(); + if (plFieldType.IsSame(value.Type())) { + return value; + } else if ((plFieldType.IsNumeric() && value.Type().IsNumeric()) || + (plFieldType.Is() && value.Type().Is())) { + return value.convert(pl.Type().Field(field).Type()); + } else { + throw Error(errLogic, "Error parsing %s field '%s' - got %s, expected %s", source, pl.Type().Field(field).Name(), + value.Type().Name(), plFieldType.Name()); + } +} + } // namespace reindexer diff --git a/cpp_src/core/cjson/jsondecoder.cc b/cpp_src/core/cjson/jsondecoder.cc index 7696741cb..2a0e2eba8 100644 --- a/cpp_src/core/cjson/jsondecoder.cc +++ b/cpp_src/core/cjson/jsondecoder.cc @@ -52,6 +52,7 @@ void JsonDecoder::decodeJsonObject(Payload& pl, CJsonBuilder& builder, const gas (void)subelem; ++count; } + validateArrayFieldRestrictions(f, count, "json"); int pos = pl.ResizeArray(field, count, true); for (auto& subelem : elem.value) { pl.Set(field, pos++, jsonValue2Variant(subelem.value, f.Type(), f.Name())); @@ -70,6 +71,7 @@ void JsonDecoder::decodeJsonObject(Payload& pl, CJsonBuilder& builder, const gas case gason::JSON_TRUE: case gason::JSON_FALSE: { validateNonArrayFieldRestrictions(objectScalarIndexes_, pl, f, field, isInArray(), "json"); + validateArrayFieldRestrictions(f, 1, "json"); objectScalarIndexes_.set(field); Variant value = jsonValue2Variant(elem.value, f.Type(), f.Name()); builder.Ref(tagName, value, field); @@ -150,7 +152,10 @@ class TagsPathGuard { void JsonDecoder::decodeJsonObject(const gason::JsonValue& root, CJsonBuilder& builder) { for (const auto& elem : root) { - int tagName = tagsMatcher_.name2tag(elem.key, true); + const int tagName = tagsMatcher_.name2tag(elem.key, true); + if (tagName == 0) { + throw Error(errParseJson, "Unsupported JSON format. Unnamed field detected"); + } TagsPathGuard tagsPathGuard(tagsPath_, tagName); decodeJson(nullptr, builder, elem.value, tagName, true); } diff --git a/cpp_src/core/cjson/msgpackdecoder.cc b/cpp_src/core/cjson/msgpackdecoder.cc index 8b72c9f21..0c7cf525c 100644 --- a/cpp_src/core/cjson/msgpackdecoder.cc +++ b/cpp_src/core/cjson/msgpackdecoder.cc @@ -11,22 +11,20 @@ template void MsgPackDecoder::setValue(Payload& pl, CJsonBuilder& builder, const T& value, int tagName) { int field = tm_.tags2field(tagsPath_.data(), tagsPath_.size()); if (field > 0) { - validateNonArrayFieldRestrictions(objectScalarIndexes_, pl, pl.Type().Field(field), field, isInArray(), "msgpack"); + const auto& f = pl.Type().Field(field); + validateNonArrayFieldRestrictions(objectScalarIndexes_, pl, f, field, isInArray(), "msgpack"); + if (!isInArray()) { + validateArrayFieldRestrictions(f, 1, "msgpack"); + } Variant val(value); builder.Ref(tagName, val, field); - pl.Set(field, std::move(val), true); + pl.Set(field, convertValueForPayload(pl, field, std::move(val), "msgpack")); objectScalarIndexes_.set(field); } else { builder.Put(tagName, value); } } -void MsgPackDecoder::iterateOverArray(const msgpack_object* begin, const msgpack_object* end, Payload& pl, CJsonBuilder& array) { - for (const msgpack_object* p = begin; p != end; ++p) { - decode(pl, array, *p, 0); - } -} - int MsgPackDecoder::decodeKeyToTag(const msgpack_object_kv& obj) { using namespace std::string_view_literals; switch (obj.key.type) { @@ -95,11 +93,43 @@ void MsgPackDecoder::decode(Payload& pl, CJsonBuilder& builder, const msgpack_ob if rx_unlikely (!f.IsArray()) { throw Error(errLogic, "Error parsing msgpack field '%s' - got array, expected scalar %s", f.Name(), f.Type().Name()); } - auto& array = builder.ArrayRef(tagName, field, count); - iterateOverArray(begin, end, pl, array); + validateArrayFieldRestrictions(f, count, "msgpack"); + int pos = pl.ResizeArray(field, count, true); + for (const msgpack_object* p = begin; p != end; ++p) { + pl.Set(field, pos++, + convertValueForPayload( + pl, field, + [&] { + switch (p->type) { + case MSGPACK_OBJECT_BOOLEAN: + return Variant{p->via.boolean}; + case MSGPACK_OBJECT_POSITIVE_INTEGER: + return Variant{int64_t(p->via.u64)}; + case MSGPACK_OBJECT_NEGATIVE_INTEGER: + return Variant{p->via.i64}; + case MSGPACK_OBJECT_FLOAT32: + case MSGPACK_OBJECT_FLOAT64: + return Variant{p->via.f64}; + case MSGPACK_OBJECT_STR: + return Variant{p_string(reinterpret_cast(&p->via.str)), Variant::hold_t{}}; + case MSGPACK_OBJECT_NIL: + case MSGPACK_OBJECT_ARRAY: + case MSGPACK_OBJECT_MAP: + case MSGPACK_OBJECT_BIN: + case MSGPACK_OBJECT_EXT: + default: + throw Error(errParams, "Unsupported MsgPack array field type: %s(%d)", ToString(p->type), + int(p->type)); + } + }(), + "msgpack")); + } + builder.ArrayRef(tagName, field, count); } else { auto array = builder.Array(tagName, type); - iterateOverArray(begin, end, pl, array); + for (const msgpack_object* p = begin; p != end; ++p) { + decode(pl, array, *p, 0); + } } break; } diff --git a/cpp_src/core/cjson/msgpackdecoder.h b/cpp_src/core/cjson/msgpackdecoder.h index 042b05263..e89f2ff8e 100644 --- a/cpp_src/core/cjson/msgpackdecoder.h +++ b/cpp_src/core/cjson/msgpackdecoder.h @@ -19,7 +19,6 @@ class MsgPackDecoder { private: void decode(Payload& pl, CJsonBuilder& builder, const msgpack_object& obj, int tagName); - void iterateOverArray(const msgpack_object* begin, const msgpack_object* end, Payload& pl, CJsonBuilder& builder); int decodeKeyToTag(const msgpack_object_kv& obj); diff --git a/cpp_src/core/cjson/protobufdecoder.cc b/cpp_src/core/cjson/protobufdecoder.cc index 7d2dcc04a..45d43d53b 100644 --- a/cpp_src/core/cjson/protobufdecoder.cc +++ b/cpp_src/core/cjson/protobufdecoder.cc @@ -1,4 +1,5 @@ #include "protobufdecoder.h" +#include "core/cjson/cjsontools.h" #include "core/schema.h" #include "estl/protobufparser.h" @@ -51,9 +52,10 @@ void ProtobufDecoder::setValue(Payload& pl, CJsonBuilder& builder, ProtobufValue if (item.isArray) { arraysStorage_.UpdateArraySize(item.tagName, field); } else { + validateArrayFieldRestrictions(f, 1, "protobuf"); builder.Ref(item.tagName, value, field); } - pl.Set(field, std::move(value), true); + pl.Set(field, convertValueForPayload(pl, field, std::move(value), "protobuf"), true); objectScalarIndexes_.set(field); } else { if (item.isArray) { @@ -78,13 +80,14 @@ Error ProtobufDecoder::decodeArray(Payload& pl, CJsonBuilder& builder, const Pro if (packed) { int count = 0; while (!parser.IsEof()) { - pl.Set(field, parser.ReadArrayItem(item.itemType), true); + pl.Set(field, convertValueForPayload(pl, field, parser.ReadArrayItem(item.itemType), "protobuf"), true); ++count; } builder.ArrayRef(item.tagName, field, count); } else { setValue(pl, builder, item); } + validateArrayFieldRestrictions(f, reinterpret_cast(pl.Field(field).p_)->len, "protobuf"); } else { CJsonBuilder& array = arraysStorage_.GetArray(item.tagName); if (packed) { diff --git a/cpp_src/core/cjson/protobufschemabuilder.h b/cpp_src/core/cjson/protobufschemabuilder.h index b05eae923..9d002b930 100644 --- a/cpp_src/core/cjson/protobufschemabuilder.h +++ b/cpp_src/core/cjson/protobufschemabuilder.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include "core/key_value_type.h" #include "objtype.h" diff --git a/cpp_src/core/cjson/tagspath.h b/cpp_src/core/cjson/tagspath.h index 116a423bf..ebf9fbd32 100644 --- a/cpp_src/core/cjson/tagspath.h +++ b/cpp_src/core/cjson/tagspath.h @@ -2,11 +2,7 @@ #include #include -#include -#include - -#include "core/keyvalue/key_string.h" -#include "core/keyvalue/variant.h" +#include "estl/h_vector.h" #include "tools/customhash.h" namespace reindexer { @@ -43,16 +39,9 @@ class IndexedPathNode { int NameTag() const noexcept { return nameTag_; } int Index() const noexcept { return index_; } - std::string_view Expression() const noexcept { - if (expression_ && expression_->length() > 0) { - return std::string_view(expression_->c_str(), expression_->length()); - } - return std::string_view(); - } bool IsArrayNode() const noexcept { return (IsForAllItems() || index_ != IndexValueType::NotSet); } bool IsWithIndex() const noexcept { return index_ != ForAllItems && index_ != IndexValueType::NotSet; } - bool IsWithExpression() const noexcept { return expression_ && !expression_->empty(); } bool IsForAllItems() const noexcept { return index_ == ForAllItems; } void MarkAllItems(bool enable) noexcept { @@ -63,14 +52,6 @@ class IndexedPathNode { } } - void SetExpression(std::string_view v) { - if (expression_) { - expression_->assign(v.data(), v.length()); - } else { - expression_ = make_key_string(v.data(), v.length()); - } - } - void SetIndex(int32_t index) noexcept { index_ = index; } void SetNameTag(int16_t nameTag) noexcept { nameTag_ = nameTag; } @@ -78,7 +59,6 @@ class IndexedPathNode { enum : int32_t { ForAllItems = -2 }; int16_t nameTag_ = 0; int32_t index_ = IndexValueType::NotSet; - key_string expression_; }; template diff --git a/cpp_src/core/clusterproxy.cc b/cpp_src/core/clusterproxy.cc index e4053d49a..23632fcc8 100644 --- a/cpp_src/core/clusterproxy.cc +++ b/cpp_src/core/clusterproxy.cc @@ -68,7 +68,7 @@ void ClusterProxy::clientToCoreQueryResults(client::QueryResults& clientResults, localTm.deserialize(ser, itemimpl.tagsMatcher().version(), itemimpl.tagsMatcher().stateToken()); } itemimpl.Value().SetLSN(item.GetLSN()); - result.Add(ItemRef(it.itemParams_.id, itemimpl.Value(), it.itemParams_.proc, it.itemParams_.nsid, true)); + result.AddItemRef(it.itemParams_.id, itemimpl.Value(), it.itemParams_.proc, it.itemParams_.nsid, true); result.SaveRawData(std::move(itemimpl)); } } diff --git a/cpp_src/core/defnsconfigs.h b/cpp_src/core/defnsconfigs.h index ce97645f4..c8e4fa4c0 100644 --- a/cpp_src/core/defnsconfigs.h +++ b/cpp_src/core/defnsconfigs.h @@ -15,7 +15,7 @@ constexpr char kClusterConfigNamespace[] = "#clusterconfig"; const std::string_view kReplicationStatsNamespace = "#replicationstats"; constexpr char kNsNameField[] = "name"; -const std::vector kDefDBConfig = { +constexpr std::string_view kDefDBConfig[] = { R"json({ "type":"profiling", "profiling":{ @@ -61,6 +61,7 @@ const std::vector kDefDBConfig = { "max_iterations_idset_preresult":20000, "index_updates_counting_mode":false, "sync_storage_flush_limit":20000, + "strict_mode":"names", "cache":{ "index_idset_cache_size":134217728, "index_idset_hits_to_cache":2, @@ -107,7 +108,7 @@ const std::vector kDefDBConfig = { } })json"}; -const std::vector kSystemNsDefs = { +const NamespaceDef kSystemNsDefs[] = { NamespaceDef(kConfigNamespace, StorageOpts().Enabled().CreateIfMissing().DropOnFileFormatError()) .AddIndex("type", "hash", "string", IndexOpts().PK()), NamespaceDef(kPerfStatsNamespace, StorageOpts()) @@ -154,7 +155,6 @@ const std::vector kSystemNsDefs = { .AddIndex("storage_enabled", "-", "bool", IndexOpts().Dense()) .AddIndex("storage_status", "-", "string", IndexOpts().Dense()) .AddIndex("storage_path", "-", "string", IndexOpts().Dense()) - .AddIndex("storage_loaded", "-", "bool", IndexOpts().Dense()) .AddIndex("optimization_completed", "-", "bool", IndexOpts().Dense()) .AddIndex("query_cache.total_size", "-", "int64", IndexOpts().Dense()) .AddIndex("query_cache.items_count", "-", "int64", IndexOpts().Dense()) diff --git a/cpp_src/core/formatters/key_string_fmt.h b/cpp_src/core/formatters/key_string_fmt.h index 59d0086df..765b78efd 100644 --- a/cpp_src/core/formatters/key_string_fmt.h +++ b/cpp_src/core/formatters/key_string_fmt.h @@ -11,7 +11,7 @@ struct fmt::printf_formatter { } template auto format(const reindexer::key_string& s, ContextT& ctx) const { - return s ? fmt::format_to(ctx.out(), "{}", std::string_view(*s)) : fmt::format_to(ctx.out(), ""); + return s ? fmt::format_to(ctx.out(), "{}", std::string_view(s)) : fmt::format_to(ctx.out(), ""); } }; @@ -19,6 +19,6 @@ template <> struct fmt::formatter : public fmt::formatter { template auto format(const reindexer::key_string& s, ContextT& ctx) const { - return s ? fmt::formatter::format(std::string_view(*s), ctx) : fmt::format_to(ctx.out(), ""); + return s ? fmt::formatter::format(std::string_view(s), ctx) : fmt::format_to(ctx.out(), ""); } }; diff --git a/cpp_src/core/formatters/namespacesname_fmt.h b/cpp_src/core/formatters/namespacesname_fmt.h index b94febbf0..4d42adfbb 100644 --- a/cpp_src/core/formatters/namespacesname_fmt.h +++ b/cpp_src/core/formatters/namespacesname_fmt.h @@ -14,3 +14,11 @@ struct fmt::printf_formatter { return fmt::format_to(ctx.out(), "{}", name.OriginalName()); } }; + +template <> +struct fmt::formatter : public fmt::formatter { + template + auto format(const reindexer::NamespaceName& name, ContextT& ctx) const { + return fmt::formatter::format(name.OriginalName(), ctx); + } +}; diff --git a/cpp_src/core/ft/ft_fast/dataholder.h b/cpp_src/core/ft/ft_fast/dataholder.h index 360a274e5..39b93ffff 100644 --- a/cpp_src/core/ft/ft_fast/dataholder.h +++ b/cpp_src/core/ft/ft_fast/dataholder.h @@ -50,17 +50,16 @@ class PackedWordEntry { class WordEntry { public: WordEntry() noexcept = default; - WordEntry(const IdRelSet& _vids, bool _virtualWord) : vids(_vids), virtualWord(_virtualWord) {} + WordEntry(const IdRelSet& _vids) : vids_(_vids) {} WordEntry(const WordEntry&) = delete; WordEntry(WordEntry&&) noexcept = default; WordEntry& operator=(const WordEntry&) = delete; WordEntry& operator=(WordEntry&&) noexcept = default; // Explicit copy - WordEntry MakeCopy() const { return WordEntry(this->vids, this->virtualWord); } + WordEntry MakeCopy() const { return WordEntry(vids_); } - IdRelSet vids; - bool virtualWord = false; + IdRelSet vids_; }; enum ProcessStatus { FullRebuild, RecommitLast, CreateNew }; diff --git a/cpp_src/core/ft/ft_fast/dataprocessor.cc b/cpp_src/core/ft/ft_fast/dataprocessor.cc index e2ea2733e..3cfe4bce7 100644 --- a/cpp_src/core/ft/ft_fast/dataprocessor.cc +++ b/cpp_src/core/ft/ft_fast/dataprocessor.cc @@ -15,8 +15,6 @@ using std::chrono::milliseconds; namespace reindexer { -constexpr int kDigitUtfSizeof = 1; - template void DataProcessor::Process(bool multithread) { ExceptionPtrWrapper exwr; @@ -78,7 +76,6 @@ typename DataProcessor::WordsVector DataProcessor::insertIntoSuf auto& suffix = holder.GetSuffix(); suffix.reserve(words_um.size() * 20, words_um.size()); - const bool enableNumbersSearch = holder.cfg_->enableNumbersSearch; WordsVector found; found.reserve(words_um.size()); @@ -97,11 +94,7 @@ typename DataProcessor::WordsVector DataProcessor::insertIntoSuf words.emplace_back(); pos = holder.BuildWordId(id); - if (enableNumbersSearch && keyIt.second.virtualWord) { - suffix.insert(keyIt.first, pos, kDigitUtfSizeof); - } else { - suffix.insert(keyIt.first, pos); - } + suffix.insert(keyIt.first, pos); } return found; } @@ -128,9 +121,9 @@ size_t DataProcessor::commitIdRelSets(const WordsVector& preprocWords, w idsetcnt += sizeof(*wIt); } - word->vids.insert(word->vids.end(), std::make_move_iterator(keyIt->second.vids.begin()), - std::make_move_iterator(keyIt->second.vids.end())); - keyIt->second.vids = IdRelSet(); + word->vids.insert(word->vids.end(), std::make_move_iterator(keyIt->second.vids_.begin()), + std::make_move_iterator(keyIt->second.vids_.end())); + keyIt->second.vids_ = IdRelSet(); word->vids.shrink_to_fit(); idsetcnt += word->vids.heap_size(); } @@ -242,7 +235,7 @@ size_t DataProcessor::buildWordsMap(words_map& words_um, bool multithrea // build words map parallel in maxIndexWorkers threads auto worker = [this, &ctxs, &vdocsTexts, offset, fieldscount, &cfg, &vdocs, &textSplitter](int i) { auto ctx = &ctxs[i]; - std::vector virtualWords; + std::vector virtualWords; const size_t start = ctx->from; const size_t fin = ctx->to; const bool enableNumbersSearch = cfg->enableNumbersSearch; @@ -273,7 +266,7 @@ size_t DataProcessor::buildWordsMap(words_map& words_um, bool multithrea auto [idxIt, emplaced] = ctx->words_um.try_emplace_prehashed(whash, word); (void)emplaced; - const int mfcnt = idxIt->second.vids.Add(vdocId, insertPos, rfield); + const int mfcnt = idxIt->second.vids_.Add(vdocId, insertPos, rfield); if (mfcnt > vdoc.mostFreqWordCount[rfield]) { vdoc.mostFreqWordCount[rfield] = mfcnt; } @@ -304,19 +297,18 @@ size_t DataProcessor::buildWordsMap(words_map& words_um, bool multithrea #if defined(RX_WITH_STDLIB_DEBUG) || defined(REINDEX_WITH_ASAN) const auto fBeforeMove = it.first; const auto sBeforeMove = it.second.MakeCopy(); - const auto sCapacityBeforeMove = it.second.vids.capacity(); + const auto sCapacityBeforeMove = it.second.vids_.capacity(); #endif // defined(RX_WITH_STDLIB_DEBUG) || defined(REINDEX_WITH_ASAN) auto [idxIt, emplaced] = words_um.try_emplace(std::move(it.first), std::move(it.second)); if (!emplaced) { #if defined(RX_WITH_STDLIB_DEBUG) || defined(REINDEX_WITH_ASAN) // Make sure, that try_emplace did not moved the values assertrx(it.first == fBeforeMove); - assertrx(it.second.virtualWord == sBeforeMove.virtualWord); - assertrx(it.second.vids.size() == sBeforeMove.vids.size()); - assertrx(it.second.vids.capacity() == sCapacityBeforeMove); + assertrx(it.second.vids_.size() == sBeforeMove.vids_.size()); + assertrx(it.second.vids_.capacity() == sCapacityBeforeMove); #endif // defined(RX_WITH_STDLIB_DEBUG) || defined(REINDEX_WITH_ASAN) - auto& resultVids = idxIt->second.vids; - auto& newVids = it.second.vids; + auto& resultVids = idxIt->second.vids_; + auto& newVids = it.second.vids_; resultVids.insert(resultVids.end(), std::make_move_iterator(newVids.begin()), std::make_move_iterator(newVids.end())); } } @@ -349,8 +341,8 @@ size_t DataProcessor::buildWordsMap(words_map& words_um, bool multithrea if (holder_.cfg_->logLevel >= LogInfo) { WrSerializer out; for (auto& w : words_um) { - if (w.second.vids.size() > vdocs.size() / 5 || int64_t(w.second.vids.size()) > holder_.cfg_->mergeLimit) { - out << w.first << "(" << w.second.vids.size() << ") "; + if (w.second.vids_.size() > vdocs.size() / 5 || int64_t(w.second.vids_.size()) > holder_.cfg_->mergeLimit) { + out << w.first << "(" << w.second.vids_.size() << ") "; } } logPrintf(LogInfo, "Total documents: %d. Potential stop words (with corresponding docs count): %s", vdocs.size(), out.Slice()); @@ -361,19 +353,17 @@ size_t DataProcessor::buildWordsMap(words_map& words_um, bool multithrea template void DataProcessor::buildVirtualWord(std::string_view word, words_map& words_um, VDocIdType docType, int rfield, size_t insertPos, - std::vector& container) { + std::vector& container) { auto& vdoc(holder_.vdocs_[docType]); NumToText::convert(word, container); - for (std::string& numberWord : container) { + for (const auto numberWord : container) { WordEntry wentry; - wentry.virtualWord = true; - auto idxIt = words_um.emplace(std::move(numberWord), std::move(wentry)).first; - const int mfcnt = idxIt->second.vids.Add(docType, insertPos, rfield); + auto idxIt = words_um.emplace(numberWord, std::move(wentry)).first; + const int mfcnt = idxIt->second.vids_.Add(docType, insertPos, rfield); if (mfcnt > vdoc.mostFreqWordCount[rfield]) { vdoc.mostFreqWordCount[rfield] = mfcnt; } ++vdoc.wordsCount[rfield]; - insertPos += kDigitUtfSizeof; } } diff --git a/cpp_src/core/ft/ft_fast/dataprocessor.h b/cpp_src/core/ft/ft_fast/dataprocessor.h index 50fe679fe..936e435b3 100644 --- a/cpp_src/core/ft/ft_fast/dataprocessor.h +++ b/cpp_src/core/ft/ft_fast/dataprocessor.h @@ -86,7 +86,7 @@ class DataProcessor { [[nodiscard]] size_t buildWordsMap(words_map& m, bool multithread, intrusive_ptr textSplitter); void buildVirtualWord(std::string_view word, words_map& words_um, VDocIdType docType, int rfield, size_t insertPos, - std::vector& container); + std::vector& container); void buildTyposMap(uint32_t startPos, const WordsVector& preprocWords); [[nodiscard]] static WordsVector insertIntoSuffix(words_map& words_um, DataHolder& holder); [[nodiscard]] static size_t commitIdRelSets(const WordsVector& preprocWords, words_map& words_um, DataHolder& holder, diff --git a/cpp_src/core/ft/ft_fast/frisochartypes.cc b/cpp_src/core/ft/ft_fast/frisochartypes.cc index 220c34699..14c72e520 100644 --- a/cpp_src/core/ft/ft_fast/frisochartypes.cc +++ b/cpp_src/core/ft/ft_fast/frisochartypes.cc @@ -88,14 +88,15 @@ bool FrisoCharTypes::utf8_numeric_string(std::string_view str) { int bytes = 1; while (*s != '\0') { - if (*s < 0) { // full-width chars. + const char c = *s; + if (c & (1 << 7)) { // full-width chars. const char* it = s; int u = utf8::unchecked::next(it); bytes = it - s; if (u < 65296 || u > 65305) { return false; } - } else if (*s < 48 || *s > 57) { + } else if (c < 48 || c > 57) { return false; } s += bytes; @@ -113,12 +114,13 @@ bool FrisoCharTypes::utf8_decimal_string(std::string_view str) { } for (i = 1; i < len; bytes = 1) { + const char c = str[i]; // count the number of char '.' - if (str[i] == '.') { + if (c == '.') { i++; p++; continue; - } else if (str[i] < 0) { + } else if (c & (1 << 7)) { // full-width numeric. const char* s = &str[0] + i; u = utf8::unchecked::next(s); @@ -126,7 +128,7 @@ bool FrisoCharTypes::utf8_decimal_string(std::string_view str) { if (u < 65296 || u > 65305) { return false; } - } else if (str[i] < 48 || str[i] > 57) { + } else if (c < 48 || c > 57) { return false; } diff --git a/cpp_src/core/ft/ft_fast/selecter.cc b/cpp_src/core/ft/ft_fast/selecter.cc index 8684ac34d..14d2d4795 100644 --- a/cpp_src/core/ft/ft_fast/selecter.cc +++ b/cpp_src/core/ft/ft_fast/selecter.cc @@ -966,7 +966,7 @@ void Selector::mergeIterationGroup(TextSearchResults& rawRes, index_t ra } if (!curMergedPos.posTmp.empty()) { present[vid] = true; - double normDist = bound(1.0 / minDist, holder_.cfg_->distanceWeight, holder_.cfg_->distanceBoost); + double normDist = bound(1.0 / (minDist < 1 ? 1 : minDist), holder_.cfg_->distanceWeight, holder_.cfg_->distanceBoost); int finalRank = normDist * termRank; //'rank' of the current subTerm is greater than the previous subTerm, update the overall 'rank' and save the rank of the // subTerm for possible diff --git a/cpp_src/core/ft/ftsetcashe.h b/cpp_src/core/ft/ftsetcashe.h index ed1ea8f74..9500b779b 100644 --- a/cpp_src/core/ft/ftsetcashe.h +++ b/cpp_src/core/ft/ftsetcashe.h @@ -10,11 +10,13 @@ struct FtIdSetCacheVal { FtIdSetCacheVal(IdSet::Ptr&& i) noexcept : ids(std::move(i)) {} FtIdSetCacheVal(IdSet::Ptr&& i, FtCtxData::Ptr&& c) noexcept : ids(std::move(i)), ctx(std::move(c)) {} size_t Size() const noexcept { return ids ? (sizeof(*ids.get()) + ids->heap_size()) : 0; } + bool IsInitialized() const noexcept { return bool(ids); } IdSet::Ptr ids; FtCtxData::Ptr ctx; }; -using FtIdSetCache = LRUCache; +using FtIdSetCache = + LRUCache, LRUWithAtomicPtr::No>; } // namespace reindexer diff --git a/cpp_src/core/ft/numtotext.cc b/cpp_src/core/ft/numtotext.cc index 44fe81df2..b7405f653 100644 --- a/cpp_src/core/ft/numtotext.cc +++ b/cpp_src/core/ft/numtotext.cc @@ -1,163 +1,100 @@ #include "numtotext.h" - -#include -#include -#include #include "tools/errors.h" namespace reindexer { constexpr std::string_view units[] = {"", "один", "два", "три", "четыре", "пять", "шесть", "семь", "восемь", "девять"}; -constexpr std::string_view unitsNominat[] = {"", "одна", "две"}; +constexpr std::string_view unitsNominat[] = {"", "одна", "две", "три", "четыре", "пять", "шесть", "семь", "восемь", "девять"}; constexpr std::string_view tens[] = {"", "одиннадцать", "двенадцать", "тринадцать", "четырнадцать", "пятнадцать", "шестнадцать", "семнадцать", "восемнадцать", "девятнадцать"}; constexpr std::string_view decades[] = {"", "десять", "двадцать", "тридцать", "сорок", "пятьдесят", "шестьдесят", "семьдесят", "восемьдесят", "девяносто"}; constexpr std::string_view hundreads[] = {"", "сто", "двести", "триста", "четыреста", "пятьсот", "шестьсот", "семьсот", "восемьсот", "девятьсот"}; -constexpr std::string_view thousands[] = {"тысяча", "тысячи", "тысяч"}; -constexpr std::string_view millions[] = {"миллион", "миллиона", "миллионов"}; -constexpr std::string_view billions[] = {"миллиард", "миллиарда", "миллиардов"}; -constexpr std::string_view trillions[] = {"триллион", "триллиона", "триллионов"}; -constexpr std::string_view quadrillion[] = {"квадриллион", "квадриллиона", "квадриллионов"}; -constexpr std::string_view quintillion[] = {"квинтиллион", "квинтиллиона", "квинтиллионов"}; -constexpr std::string_view sextillion[] = {"секстиллион", "секстиллиона", "секстиллионов"}; -constexpr std::string_view septillion[] = {"септиллион", "септиллиона", "септиллионов"}; -enum Numorders : int { Thousands, Millions, Billions, Trillions, Quadrillion, Quintillion, Sextillion, Septillion }; +// clang-format off +constexpr static std::string_view kNumOrders[][10] = { +// 0 1 2 3 4 5 6 7 8 9 + {"тысяч", "тысяча", "тысячи", "тысячи", "тысячи", "тысяч", "тысяч", "тысяч", "тысяч", "тысяч"}, + {"миллионов", "миллион", "миллиона", "миллиона", "миллиона", "миллионов", "миллионов", "миллионов", "миллионов", "миллионов"}, + {"миллиардов", "миллиард", "миллиарда", "миллиарда", "миллиарда", "миллиардов", "миллиардов", "миллиардов", "миллиардов", "миллиардов"}, + {"триллионов", "триллион", "триллиона", "триллиона", "триллиона", "триллионов", "триллионов", "триллионов", "триллионов", "триллионов"}, + {"квадриллионов", "квадриллион", "квадриллиона", "квадриллиона", "квадриллиона", "квадриллионов", "квадриллионов", "квадриллионов", "квадриллионов", "квадриллионов"}, + {"квинтиллионов", "квинтиллион", "квинтиллиона", "квинтиллиона", "квинтиллиона", "квинтиллионов", "квинтиллионов", "квинтиллионов", "квинтиллионов", "квинтиллионов"}, + {"секстиллионов", "секстиллион", "секстиллиона", "секстиллиона", "секстиллиона", "секстиллионов", "секстиллионов", "секстиллионов", "секстиллионов", "секстиллионов"}, + {"септиллионов", "септиллион", "септиллиона", "септиллиона", "септиллиона", "септиллионов", "септиллионов", "септиллионов", "септиллионов", "септиллионов"}}; +// clang-format on +RX_ALWAYS_INLINE static int ansiCharacterToDigit(char ch) noexcept { return static_cast(ch - 48); } -static std::string_view getNumorder(int numorder, int i) { - switch (numorder) { - case Thousands: - return thousands[i]; - case Millions: - return millions[i]; - case Billions: - return billions[i]; - case Trillions: - return trillions[i]; - case Quadrillion: - return quadrillion[i]; - case Quintillion: - return quintillion[i]; - case Sextillion: - return sextillion[i]; - case Septillion: - return septillion[i]; - default: - throw Error(errParams, "Incorrect order [%s]: too big", numorder); +static std::vector& formTextString(std::string_view str, std::vector& words) { + if (str.empty()) { + return words; } -} - -RX_ALWAYS_INLINE int ansiCharacterToDigit(char ch) noexcept { return static_cast(ch - 48); } - -static std::vector getOrders(std::string_view str) { - std::string numStr(str); - std::reverse(numStr.begin(), numStr.end()); - int numChars = numStr.length(); - std::vector orders; - orders.reserve(numChars / 3); - for (int i = 0; i < numChars; i += 3) { - std::string tempString; - if (i <= numChars - 3) { - tempString += numStr[i + 2]; - tempString += numStr[i + 1]; - tempString += numStr[i]; - } else { - int lostChars = numChars - i; - switch (lostChars) { - case 1: - tempString = numStr[i]; - break; - case 2: - tempString += numStr[i + 1]; - tempString += numStr[i]; - break; - default: - throw Error(errLogic, "Unexpected lost characters number: %d", lostChars); - } + unsigned int ordersMax = (str.length() - 1) / 3 + 1; + unsigned int orderDigitCount = str.length() - (ordersMax - 1) * 3; + unsigned int baseOffset = 0; + for (int k = ordersMax; k > 0; k--) { + unsigned int hundreadsIndx = 0; + unsigned int tenIndex = 0; + unsigned int numIndex = 0; + switch (orderDigitCount) { + case 1: + numIndex = ansiCharacterToDigit(str[baseOffset]); + break; + case 2: + tenIndex = ansiCharacterToDigit(str[baseOffset]); + numIndex = ansiCharacterToDigit(str[baseOffset + 1]); + break; + case 3: + hundreadsIndx = ansiCharacterToDigit(str[baseOffset]); + tenIndex = ansiCharacterToDigit(str[baseOffset + 1]); + numIndex = ansiCharacterToDigit(str[baseOffset + 2]); + break; + default: + throw Error(errLogic, "Incorrect orderDigitCount %d", orderDigitCount); + } + if (hundreadsIndx != 0) { + words.emplace_back(hundreads[hundreadsIndx]); } - orders.emplace_back(std::move(tempString)); - } - return orders; -} - -static std::vector getDecimal(const std::string& str, int i) { - std::vector words; - int v = std::stoi(str); - if (v < 10) { - words.emplace_back(units[v]); - } else if (v % 10 == 0) { - words.emplace_back(decades[v / 10]); - } else if (v < 20) { - words.emplace_back(tens[v % 10]); - } else if (v % 10 < 3 && i == 1) { - words.emplace_back(decades[ansiCharacterToDigit(str[0])]); - words.emplace_back(unitsNominat[ansiCharacterToDigit(str[1])]); - } else { - words.emplace_back(decades[ansiCharacterToDigit(str[0])]); - words.emplace_back(units[ansiCharacterToDigit(str[1])]); - } - return words; -} -static std::string getNumOrders(int i, int num) { - std::string orders; - if (i > 0) { - if (num % 10 > 4 || (num % 100 > 10 && num % 100 < 20) || num % 10 == 0) { - orders = getNumorder(i - 1, 2); - } else if (num % 10 > 1 && num % 10 < 5) { - orders = getNumorder(i - 1, 1); - } else { - orders = getNumorder(i - 1, 0); + if (tenIndex == 1 && numIndex != 0) { + words.emplace_back(tens[numIndex]); + } else if (tenIndex != 0) { + words.emplace_back(decades[tenIndex]); } - } - return orders; -} -static std::vector formTextString(const std::string& str, int i) { - std::vector words; - int strlen = str.length(); - if (strlen == 3) { - words.emplace_back(hundreads[ansiCharacterToDigit(str[0])]); - std::string decimal; - decimal += str[1]; - decimal += str[2]; - std::vector decimalWords(getDecimal(decimal, i)); - words.insert(words.end(), make_move_iterator(decimalWords.begin()), make_move_iterator(decimalWords.end())); - } else if (strlen == 2) { - words = getDecimal(str, i); - } else { - if ((i == 1) && std::stoi(str) < 3) { - words.emplace_back(unitsNominat[std::stoi(str)]); - } else { - words.emplace_back(units[std::stoi(str)]); + if (numIndex != 0 && tenIndex != 1) { + if (k == 2) { // thousands + words.emplace_back(unitsNominat[numIndex]); + } else { + words.emplace_back(units[numIndex]); + } } - } - if (i > 0) { - words.emplace_back(getNumOrders(i, std::stoi(str))); + bool isAllNull = hundreadsIndx == 0 && tenIndex == 0 && numIndex == 0; + if (k > 1 && !isAllNull) { + words.emplace_back(kNumOrders[k - 2][numIndex]); + } + baseOffset += orderDigitCount; + orderDigitCount = 3; } return words; } -std::vector& NumToText::convert(std::string_view str, std::vector& output) { +std::vector& NumToText::convert(std::string_view str, std::vector& output) { output.resize(0); - if ((str.length() == 1) && (str[0] == '0')) { - output = {"ноль"}; - return output; + unsigned int k = 0; + for (; k < str.length() && str[k] == '0'; ++k) { + output.emplace_back("ноль"); } + str = str.substr(k); // unreasonably big if (str.length() > 27) { + output.resize(0); return output; } - std::vector orders(getOrders(str)); - for (size_t i = 0; i < orders.size(); ++i) { - size_t oppositeSideIndex = orders.size() - 1 - i; - std::vector digits(formTextString(orders[oppositeSideIndex], oppositeSideIndex)); - output.insert(output.end(), make_move_iterator(digits.begin()), make_move_iterator(digits.end())); - } - return output; + + return formTextString(str, output); } } // namespace reindexer diff --git a/cpp_src/core/ft/numtotext.h b/cpp_src/core/ft/numtotext.h index db87cbd38..2c0a5b755 100644 --- a/cpp_src/core/ft/numtotext.h +++ b/cpp_src/core/ft/numtotext.h @@ -10,7 +10,7 @@ namespace reindexer { class NumToText { public: - static std::vector& convert(std::string_view numStr, std::vector& output); + static std::vector& convert(std::string_view numStr, std::vector& output); }; } // namespace reindexer diff --git a/cpp_src/core/ft/stopwords/stop_en.cc b/cpp_src/core/ft/stopwords/stop_en.cc index 8caf56a90..d04a1daa2 100644 --- a/cpp_src/core/ft/stopwords/stop_en.cc +++ b/cpp_src/core/ft/stopwords/stop_en.cc @@ -1,15 +1,16 @@ namespace reindexer { const char* stop_words_en[] = { - "a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", - "aren", "as", "at", "be", "because", "been", "before", "being", "below", "between", "both", "but", - "by", "can", "cannot", "could", "couldn", "did", "didn", "do", "does", "doesn", "doing", "dont", - "down", "during", "each", "few", "for", "from", "further", "had", "hadnt", "has", "hasnt", "have", - "havent", "having", "he", "hed", "hell", "hes", "her", "here", "hers", "herself", "him", "himself", - "his", "how", "hows", "i", "id", "im", "if", "in", "into", "is", "it", "its", - "itself", "me", "more", "most", "must", "my", "myself", "no", "nor", "not", "of", "off", - "on", "once", "only", "or", "other", "ought", "our", "ours", "ourselves", "out", "over", "own", - "same", "she", "should", "so", "some", "such", "than", "that", "the", "their", "theirs", "them", - "themselves", "then", "there", "these", "they", "this", "those", "through", "to", "too", "under", "until", - "up", "very", "was", "we", "were", "what", "when", "where", "which", "while", "who", "whom", - "why", "with", "would", "you", "your", "yours", "yourself", "yourselves", nullptr}; + "a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", + "aren", "as", "at", "be", "because", "been", "before", "being", "below", "between", "both", "but", + "by", "can", "cannot", "could", "couldn", "did", "didn", "do", "does", "doesn", "doing", "dont", + "down", "during", "each", "few", "for", "from", "further", "had", "hadn", "hadnt", "has", "hasn", + "hasnt", "have", "havent", "having", "he", "hed", "hell", "hes", "her", "here", "hers", "herself", + "him", "himself", "his", "how", "hows", "i", "id", "im", "if", "in", "into", "is", + "it", "its", "itself", "ll", "me", "more", "most", "must", "my", "myself", "no", "nor", + "not", "of", "off", "on", "once", "only", "or", "other", "ought", "our", "ours", "ourselves", + "out", "over", "own", "same", "she", "should", "so", "some", "such", "than", "that", "the", + "their", "theirs", "them", "themselves", "then", "there", "these", "they", "this", "those", "through", "to", + "too", "under", "until", "up", "ve", "very", "was", "we", "were", "what", "when", "where", + "which", "while", "who", "whom", "why", "with", "will", "would", "you", "your", "yours", "yourself", + "yourselves", nullptr}; } diff --git a/cpp_src/core/ft/stopwords/stop_ru.cc b/cpp_src/core/ft/stopwords/stop_ru.cc index 659bfd905..217501036 100644 --- a/cpp_src/core/ft/stopwords/stop_ru.cc +++ b/cpp_src/core/ft/stopwords/stop_ru.cc @@ -1,353 +1,31 @@ namespace reindexer { const char* stop_words_ru[] = { - // clang-format off - "а", - "е", - "и", - "ж", - "м", - "о", - "на", - "не", - "ни", - "об", - "но", - "он", - "мне", - "мои", - "мож", - "она", - "они", - "оно", - "мной", - "много", - "мною", - "мой", - "мог", - "могут", - "можно", - "может", - "моя", - "моё", - "мочь", - "над", - "нее", - "оба", - "нам", - "нем", - "нами", - "ними", - "мимо", - "одной", - "одного", - "менее", - "однажды", - "однако", - "меня", - "нему", - "меньше", - "ней", - "наверху", - "него", - "ниже", - "мало", - "надо", - "назад", - "недавно", - "миллионов", - "недалеко", - "между", - "низко", - "нельзя", - "нибудь", - "наконец", - "никогда", - "никуда", - "нас", - "наш", - "нет", - "нею", - "неё", - "них", - "наша", - "наше", - "наши", - "ничего", - "нередко", - "обычно", - "опять", - "около", - "мы", - "ну", - "нх", - "от", - "нужно", - "очень", - "отсюда", - "в", - "во", - "вон", - "вниз", - "внизу", - "вокруг", - "вот", - "вверх", - "вам", - "вами", - "важное", - "важная", - "важные", - "важный", - "вдали", - "везде", - "ведь", - "вас", - "ваш", - "ваша", - "ваше", - "ваши", - "впрочем", - "весь", - "вдруг", - "вы", - "все", - "всем", - "всеми", - "всему", - "всего", - "всегда", - "всех", - "всею", - "всю", - "вся", - "всё", - "всюду", - "год", - "года", - "году", - "где", - "да", - "ее", - "за", - "из", - "ли", - "же", - "им", - "до", - "по", - "ими", - "под", - "иногда", - "довольно", - "именно", - "долго", - "позже", - "более", - "должно", - "пожалуйста", - "значит", - "иметь", - "больше", - "пока", - "ему", - "имя", - "пор", - "пора", - "потом", - "потому", - "после", - "почему", - "почти", - "посреди", - "ей", - "его", - "дел", - "или", - "без", - "день", - "занят", - "занята", - "занято", - "заняты", - "давно", - "даже", - "алло", - "жизнь", - "далеко", - "близко", - "здесь", - "дальше", - "для", - "лет", - "зато", - "даром", - "перед", - "затем", - "зачем", - "лишь", - "ею", - "её", - "их", - "бы", - "еще", - "при", - "был", - "про", - "против", - "просто", - "бывает", - "бывь", - "если", - "люди", - "была", - "были", - "было", - "будем", - "будет", - "будете", - "будешь", - "буду", - "будь", - "будто", - "будут", - "ещё", - "друго", - "другое", - "другой", - "другие", - "другая", - "других", - "есть", - "быть", - "лучше", - "к", - "ком", - "конечно", - "кому", - "кого", - "когда", - "которой", - "которого", - "которая", - "которые", - "который", - "которых", - "кем", - "каждое", - "каждая", - "каждые", - "каждый", - "кажется", - "как", - "какой", - "какая", - "кто", - "кроме", - "куда", - "кругом", - "с", - "у", - "я", - "та", - "те", - "уж", - "со", - "то", - "том", - "снова", - "тому", - "совсем", - "того", - "тогда", - "тоже", - "собой", - "тобой", - "собою", - "тобою", - "сначала", - "только", - "уметь", - "тот", - "тою", - "хорошо", - "хотеть", - "хочешь", - "хоть", - "хотя", - "свое", - "свои", - "твой", - "своей", - "своего", - "своих", - "свою", - "твоя", - "твоё", - "раз", - "уже", - "сам", - "там", - "тем", - "чем", - "сама", - "сами", - "теми", - "само", - "рано", - "самом", - "самому", - "самой", - "самого", - "самим", - "самими", - "самих", - "саму", - "чему", - "чего", - "себе", - "тебе", - "разве", - "теперь", - "себя", - "тебя", - "спасибо", - "слишком", - "так", - "такое", - "такой", - "такие", - "также", - "такая", - "сих", - "тех", - "чаще", - "через", - "часто", - "сколько", - "ту", - "ты", - "эта", - "эти", - "что", - "это", - "чтоб", - "этом", - "этому", - "этой", - "этого", - "чтобы", - "этот", - "стал", - "туда", - "этим", - "этими", - "рядом", - "этих", - "тут", - "эту", - "суть", - "чуть", - "тысяч", - nullptr}; -// clang-format on + "а", "и", "ж", "о", "на", "не", "ни", "об", "но", "он", "мне", "мои", + "мож", "она", "они", "оно", "мной", "много", "мною", "мой", "мог", "могут", "можно", "может", + "моя", "моё", "мочь", "над", "нее", "оба", "нам", "нем", "нами", "ними", "мимо", "одной", + "одного", "менее", "однажды", "однако", "меня", "нему", "меньше", "ней", "него", "ниже", "мало", "надо", + "назад", "между", "низко", "нельзя", "нибудь", "наконец", "никогда", "никуда", "нас", "наш", "нет", "нею", + "неё", "них", "наша", "наше", "наши", "ничего", "опять", "около", "мы", "ну", "нх", "от", + "нужно", "очень", "отсюда", "в", "во", "вон", "вниз", "внизу", "вот", "вверх", "вам", "вами", + "вдали", "везде", "ведь", "вас", "ваш", "ваша", "ваше", "ваши", "впрочем", "весь", "вдруг", "вы", + "все", "всем", "всеми", "всему", "всего", "всегда", "всех", "всею", "всю", "вся", "всё", "всюду", + "год", "года", "году", "где", "да", "ее", "за", "из", "ли", "же", "им", "до", + "по", "ими", "под", "иногда", "довольно", "именно", "долго", "позже", "более", "должно", "пожалуйста", "значит", + "иметь", "больше", "пока", "ему", "имя", "пор", "пора", "потом", "потому", "после", "почему", "почти", + "посреди", "ей", "его", "дел", "или", "без", "день", "давно", "даже", "алло", "жизнь", "далеко", + "близко", "здесь", "дальше", "для", "лет", "зато", "даром", "перед", "затем", "зачем", "лишь", "ею", + "её", "их", "бы", "еще", "при", "был", "про", "против", "просто", "бывает", "бывь", "если", + "люди", "была", "были", "было", "будем", "будет", "будете", "будешь", "буду", "будь", "будто", "будут", + "ещё", "друго", "другое", "другой", "другие", "другая", "других", "есть", "быть", "лучше", "к", "ком", + "конечно", "кому", "кого", "когда", "которой", "которого", "которая", "которые", "который", "которых", "кем", "каждое", + "каждая", "каждые", "каждый", "кажется", "как", "какой", "какая", "кто", "кроме", "куда", "кругом", "с", + "у", "я", "та", "те", "уж", "со", "то", "том", "снова", "тому", "совсем", "того", + "тогда", "тоже", "собой", "тобой", "собою", "тобою", "сначала", "только", "уметь", "тот", "тою", "хорошо", + "хотеть", "хочешь", "хоть", "хотя", "свое", "свои", "твой", "своей", "своего", "своих", "свою", "твоя", + "твоё", "раз", "уже", "сам", "там", "тем", "чем", "сама", "сами", "теми", "само", "самом", + "самому", "самой", "самого", "самим", "самими", "самих", "саму", "чему", "чего", "себе", "тебе", "разве", + "теперь", "себя", "тебя", "спасибо", "слишком", "так", "такое", "такой", "такие", "также", "такая", "сих", + "тех", "чаще", "через", "часто", "сколько", "ту", "ты", "эта", "эти", "что", "это", "чтоб", + "этом", "этому", "этой", "этого", "чтобы", "этот", "стал", "туда", "этим", "этими", "рядом", "этих", + "тут", "эту", "суть", "чуть", nullptr}; } // namespace reindexer diff --git a/cpp_src/core/idsetcache.h b/cpp_src/core/idsetcache.h index fcc57d6a0..62efdd654 100644 --- a/cpp_src/core/idsetcache.h +++ b/cpp_src/core/idsetcache.h @@ -61,6 +61,7 @@ struct IdSetCacheVal { IdSetCacheVal() = default; IdSetCacheVal(IdSet::Ptr&& i) noexcept : ids(std::move(i)) {} size_t Size() const noexcept { return ids ? (sizeof(*ids.get()) + ids->heap_size()) : 0; } + bool IsInitialized() const noexcept { return bool(ids); } IdSet::Ptr ids; }; @@ -87,10 +88,12 @@ struct hash_idset_cache_key { size_t operator()(const IdSetCacheKey& s) const noexcept { return (size_t(s.cond) << 8) ^ (size_t(s.sort) << 16) ^ s.keys->Hash(); } }; -using IdSetCacheBase = LRUCache; +using IdSetCacheBase = + LRUCache, LRUWithAtomicPtr::Yes>; class IdSetCache : public IdSetCacheBase { public: + IdSetCache() = default; IdSetCache(size_t sizeLimit, uint32_t hitCount) : IdSetCacheBase(sizeLimit, hitCount) {} void ClearSorted(const std::bitset& s) { if (s.any()) { diff --git a/cpp_src/core/index/index.cc b/cpp_src/core/index/index.cc index c2fa3a9a9..1e6e52c95 100644 --- a/cpp_src/core/index/index.cc +++ b/cpp_src/core/index/index.cc @@ -61,7 +61,16 @@ std::unique_ptr Index::New(const IndexDef& idef, PayloadType&& payloadTyp case IndexUuidHash: return IndexUuid_New(idef, std::move(payloadType), std::move(fields), cacheCfg); } - throw Error(errParams, "Ivalid index type %d for index '%s'", idef.Type(), idef.name_); + throw Error(errParams, "Invalid index type %d for index '%s'", idef.Type(), idef.name_); +} + +IndexPerfStat Index::GetIndexPerfStat() { + return IndexPerfStat(name_, selectPerfCounter_.Get(), commitPerfCounter_.Get()); +} + +void Index::ResetIndexPerfStat() { + this->selectPerfCounter_.Reset(); + this->commitPerfCounter_.Reset(); } template diff --git a/cpp_src/core/index/index.h b/cpp_src/core/index/index.h index 7e4875c6e..7750b0c67 100644 --- a/cpp_src/core/index/index.h +++ b/cpp_src/core/index/index.h @@ -118,13 +118,8 @@ class Index { PerfStatCounterMT& GetSelectPerfCounter() { return selectPerfCounter_; } PerfStatCounterMT& GetCommitPerfCounter() { return commitPerfCounter_; } - IndexPerfStat GetIndexPerfStat() { - return IndexPerfStat(name_, selectPerfCounter_.Get(), commitPerfCounter_.Get()); - } - void ResetIndexPerfStat() { - selectPerfCounter_.Reset(); - commitPerfCounter_.Reset(); - } + virtual IndexPerfStat GetIndexPerfStat(); + virtual void ResetIndexPerfStat(); virtual bool HoldsStrings() const noexcept = 0; virtual void DestroyCache() {} virtual void ClearCache() {} @@ -139,7 +134,7 @@ class Index { protected: // Index type. Can be one of enum IndexType IndexType type_; - // Name of index (usualy name of field). + // Name of index (usually name of field). std::string name_; // Vector or ids, sorted by this index. Available only for ordered indexes std::vector sortOrders_; @@ -160,7 +155,7 @@ class Index { PerfStatCounterMT selectPerfCounter_; KeyValueType keyType_ = KeyValueType::Undefined{}; KeyValueType selectKeyType_ = KeyValueType::Undefined{}; - // Count of sorted indexes in namespace to resereve additional space in idsets + // Count of sorted indexes in namespace to reserve additional space in idsets int sortedIdxCount_ = 0; bool isBuilt_{false}; diff --git a/cpp_src/core/index/indexfastupdate.cc b/cpp_src/core/index/indexfastupdate.cc new file mode 100644 index 000000000..51f4dcc23 --- /dev/null +++ b/cpp_src/core/index/indexfastupdate.cc @@ -0,0 +1,66 @@ +#include "core/index/indexfastupdate.h" +#include "core/formatters/namespacesname_fmt.h" +#include "core/index/index.h" +#include "core/namespace/namespaceimpl.h" +#include "tools/logger.h" + +namespace reindexer { +bool IndexFastUpdate::Try(NamespaceImpl& ns, const IndexDef& from, const IndexDef& to) { + if (RelaxedEqual(from, to)) { + logFmt(LogInfo, "[{}]:{} Start fast update index '{}'", ns.name_, ns.wal_.GetServer(), from.name_); + + const auto idxNo = ns.indexesNames_.find(from.name_)->second; + auto& index = ns.indexes_[idxNo]; + auto newIndex = Index::New(to, PayloadType(index->GetPayloadType()), FieldsSet{index->Fields()}, ns.config_.cacheConfig); + VariantArray keys, resKeys; + for (size_t rowId = 0; rowId < ns.items_.size(); ++rowId) { + if (ns.items_[rowId].IsFree()) { + continue; + } + + bool needClearCache = false; + ConstPayload(ns.payloadType_, ns.items_[rowId]).Get(idxNo, keys); + newIndex->Upsert(resKeys, keys, rowId, needClearCache); + } + if (index->IsOrdered()) { + auto indexesCacheCleaner{ns.GetIndexesCacheCleaner()}; + indexesCacheCleaner.Add(index->SortId()); + } + + index = std::move(newIndex); + + ns.updateSortedIdxCount(); + ns.markUpdated(IndexOptimization::Full); + + logFmt(LogInfo, "[{}]:{} Index '{}' successfully updated using a fast strategy", ns.name_, ns.wal_.GetServer(), from.name_); + + return true; + } + return false; +} + +bool IndexFastUpdate::RelaxedEqual(const IndexDef& from, const IndexDef& to) noexcept { + if (!isLegalTypeTransform(from.Type(), to.Type())) { + return false; + } + auto comparisonIndex = from; + comparisonIndex.indexType_ = to.indexType_; + comparisonIndex.opts_.Dense(to.opts_.IsDense()); + comparisonIndex.opts_.SetCollateMode(to.opts_.GetCollateMode()); + comparisonIndex.opts_.SetCollateSortOrder(to.opts_.GetCollateSortOrder()); + return comparisonIndex.IsEqual(to, IndexComparison::Full); +} + +bool IndexFastUpdate::isLegalTypeTransform(IndexType from, IndexType to) noexcept { + return std::find_if(kTransforms.begin(), kTransforms.end(), [from, to](const auto& set) { + return set.find(from) != set.end() && set.find(to) != set.end(); + }) != kTransforms.end(); +} +const std::vector> IndexFastUpdate::kTransforms = { + {IndexType::IndexIntBTree, IndexType::IndexIntHash, IndexType::IndexIntStore}, + {IndexType::IndexInt64BTree, IndexType::IndexInt64Hash, IndexType::IndexInt64Store}, + {IndexType::IndexStrBTree, IndexType::IndexStrHash, IndexType::IndexStrStore}, + {IndexType::IndexDoubleStore, IndexType::IndexDoubleBTree}, + {IndexType::IndexUuidStore, IndexType::IndexUuidHash}, +}; +} // namespace reindexer diff --git a/cpp_src/core/index/indexfastupdate.h b/cpp_src/core/index/indexfastupdate.h new file mode 100644 index 000000000..03aba3636 --- /dev/null +++ b/cpp_src/core/index/indexfastupdate.h @@ -0,0 +1,18 @@ +#include "core/type_consts.h" +#include "estl/fast_hash_set.h" + +namespace reindexer { + +class NamespaceImpl; +struct IndexDef; + +struct IndexFastUpdate { + static bool Try(NamespaceImpl& ns, const IndexDef& from, const IndexDef& to); + static bool RelaxedEqual(const IndexDef& from, const IndexDef& to) noexcept; + +private: + static bool isLegalTypeTransform(IndexType from, IndexType to) noexcept; + static const std::vector> kTransforms; +}; + +} // namespace reindexer \ No newline at end of file diff --git a/cpp_src/core/index/indexordered.cc b/cpp_src/core/index/indexordered.cc index aa824e012..7fd7172b1 100644 --- a/cpp_src/core/index/indexordered.cc +++ b/cpp_src/core/index/indexordered.cc @@ -1,4 +1,3 @@ - #include "indexordered.h" #include "core/nsselecter/btreeindexiterator.h" #include "core/rdxcontext.h" @@ -11,7 +10,7 @@ template Variant IndexOrdered::Upsert(const Variant& key, IdType id, bool& clearCache) { if (key.Type().Is()) { if (this->empty_ids_.Unsorted().Add(id, IdSet::Auto, this->sortedIdxCount_)) { - this->cache_.reset(); + this->cache_.ResetImpl(); clearCache = true; this->isBuilt_ = false; } @@ -29,7 +28,7 @@ Variant IndexOrdered::Upsert(const Variant& key, IdType id, bool& clearCache) if (keyIt->second.Unsorted().Add(id, this->opts_.IsPK() ? IdSet::Ordered : IdSet::Auto, this->sortedIdxCount_)) { this->isBuilt_ = false; - this->cache_.reset(); + this->cache_.ResetImpl(); clearCache = true; } this->tracker_.markUpdated(this->idx_map, keyIt); @@ -49,7 +48,7 @@ SelectKeyResults IndexOrdered::SelectKey(const VariantArray& keys, CondType c // Get set of keys or single key if (!IsOrderedCondition(condition)) { if (opts.unbuiltSortOrders && keys.size() > 1) { - throw Error(errLogic, "Attemt to use btree index '%s' for sort optimization with unordered multivalue condition (%s)", + throw Error(errLogic, "Attempt to use btree index '%s' for sort optimization with unordered multivalued condition (%s)", this->Name(), CondTypeToStr(condition)); } return IndexUnordered::SelectKey(keys, condition, sortId, opts, ctx, rdxCtx); @@ -140,15 +139,15 @@ SelectKeyResults IndexOrdered::SelectKey(const VariantArray& keys, CondType c T* i_map; SortType sortId; typename T::iterator startIt, endIt; - } ctx = {&this->idx_map, sortId, startIt, endIt}; + } selectorCtx = {&this->idx_map, sortId, startIt, endIt}; - auto selector = [&ctx, count](SelectKeyResult& res, size_t& idsCount) { + auto selector = [&selectorCtx, count](SelectKeyResult& res, size_t& idsCount) { idsCount = 0; res.reserve(count); - for (auto it = ctx.startIt; it != ctx.endIt; ++it) { - assertrx_dbg(it != ctx.i_map->end()); + for (auto it = selectorCtx.startIt; it != selectorCtx.endIt; ++it) { + assertrx_dbg(it != selectorCtx.i_map->end()); idsCount += it->second.Unsorted().Size(); - res.emplace_back(it->second, ctx.sortId); + res.emplace_back(it->second, selectorCtx.sortId); } res.deferedExplicitSort = false; return false; @@ -201,8 +200,7 @@ void IndexOrdered::MakeSortOrders(UpdateSortedContext& ctx) { } } } - // fill unexist indexs - + // fill non-existent indexs for (auto it = ids2Sorts.begin(); it != ids2Sorts.end(); ++it) { if (*it == SortIdUnfilled) { *it = idx; diff --git a/cpp_src/core/index/indexstore.cc b/cpp_src/core/index/indexstore.cc index 1fc21dcad..8bbbe522a 100644 --- a/cpp_src/core/index/indexstore.cc +++ b/cpp_src/core/index/indexstore.cc @@ -23,14 +23,15 @@ void IndexStore::Delete(const Variant& key, IdType /*id*/, StringsHo return; } auto keyIt = str_map.find(std::string_view(key)); - // assertf(keyIt != str_map.end(), "Delete unexists key from index '%s' id=%d", name_, id); + // assertf(keyIt != str_map.end(), "Delete non-existent key from index '%s' id=%d", name_, id); if (keyIt == str_map.end()) { return; } assertrx_dbg(keyIt->second > 0); if ((keyIt->second--) == 1) { - const auto strSize = sizeof(*keyIt->first.get()) + keyIt->first->heap_size(); - memStat_.dataSize -= sizeof(unordered_str_map::value_type) + strSize; + const auto strSize = keyIt->first.heap_size(); + const auto staticSizeApproximate = size_t(float(sizeof(unordered_str_map::value_type)) / str_map.max_load_factor()); + memStat_.dataSize -= staticSizeApproximate + strSize; strHolder.Add(std::move(keyIt->first), strSize); str_map.template erase(keyIt); } @@ -70,11 +71,11 @@ Variant IndexStore::Upsert(const Variant& key, IdType id, bool& /*cl keyIt = str_map.find(std::string_view(key)); if (keyIt == str_map.end()) { keyIt = str_map.emplace(static_cast(key), 0).first; - // sizeof(key_string) + heap of string - memStat_.dataSize += sizeof(unordered_str_map::value_type) + sizeof(*keyIt->first.get()) + keyIt->first->heap_size(); + const auto staticSizeApproximate = size_t(float(sizeof(unordered_str_map::value_type)) / str_map.max_load_factor()); + memStat_.dataSize += staticSizeApproximate + keyIt->first.heap_size(); } ++(keyIt->second); - val = (*keyIt->first); + val = keyIt->first; } else { val = std::string_view(key); } @@ -127,11 +128,11 @@ SelectKeyResults IndexStore::SelectKey(const VariantArray& keys, CondType con const BaseFunctionCtx::Ptr& /*ctx*/, const RdxContext& rdxCtx) { const auto indexWard(rdxCtx.BeforeIndexWork()); if (condition == CondEmpty && !this->opts_.IsArray() && !this->opts_.IsSparse()) { - throw Error(errParams, "The 'is NULL' condition is suported only by 'sparse' or 'array' indexes"); + throw Error(errParams, "The 'is NULL' condition is supported only by 'sparse' or 'array' indexes"); } if (condition == CondAny && !this->opts_.IsArray() && !this->opts_.IsSparse() && !sopts.distinct) { - throw Error(errParams, "The 'NOT NULL' condition is suported only by 'sparse' or 'array' indexes"); + throw Error(errParams, "The 'NOT NULL' condition is supported only by 'sparse' or 'array' indexes"); } return ComparatorIndexed{ diff --git a/cpp_src/core/index/indextext/fastindextext.cc b/cpp_src/core/index/indextext/fastindextext.cc index ade4732b6..1ae446e89 100644 --- a/cpp_src/core/index/indextext/fastindextext.cc +++ b/cpp_src/core/index/indextext/fastindextext.cc @@ -60,9 +60,7 @@ Variant FastIndexText::Upsert(const Variant& key, IdType id, bool& clearCache } if (keyIt->second.Unsorted().Add(id, this->opts_.IsPK() ? IdSet::Ordered : IdSet::Auto, 0)) { this->isBuilt_ = false; - if (this->cache_ft_) { - this->cache_ft_->Clear(); - } + this->cache_ft_.Clear(); clearCache = true; } this->addMemStat(keyIt); @@ -88,8 +86,8 @@ void FastIndexText::Delete(const Variant& key, IdType id, StringsHolder& strH int delcnt = keyIt->second.Unsorted().Erase(id); (void)delcnt; // TODO: we have to implement removal of composite indexes (doesn't work right now) - assertf(this->opts_.IsArray() || this->Opts().IsSparse() || delcnt, "Delete unexists id from index '%s' id=%d,key=%s", this->name_, id, - key.As()); + assertf(this->opts_.IsArray() || this->Opts().IsSparse() || delcnt, "Delete non-existent id from index '%s' id=%d,key=%s", this->name_, + id, key.As()); if (keyIt->second.Unsorted().IsEmpty()) { this->tracker_.markDeleted(keyIt); @@ -107,9 +105,7 @@ void FastIndexText::Delete(const Variant& key, IdType id, StringsHolder& strH } else { this->addMemStat(keyIt); } - if (this->cache_ft_) { - this->cache_ft_->Clear(); - } + this->cache_ft_.Clear(); clearCache = true; } @@ -119,12 +115,12 @@ IndexMemStat FastIndexText::GetMemStat(const RdxContext& ctx) { contexted_shared_lock lck(this->mtx_, ctx); ret.fulltextSize = this->holder_->GetMemStat(); - ret.idsetCache = this->cache_ft_ ? this->cache_ft_->GetMemStat() : LRUCacheMemStat(); + ret.idsetCache = this->cache_ft_.GetMemStat(); return ret; } template template -typename MergeType::iterator FastIndexText::unstableRemoveIf(MergeType& md, int minRelevancy, double scalingFactor, size_t& releventDocs, +typename MergeType::iterator FastIndexText::unstableRemoveIf(MergeType& md, int minRelevancy, double scalingFactor, size_t& relevantDocs, int& cnt) { if (md.empty()) { return md.begin(); @@ -144,7 +140,7 @@ typename MergeType::iterator FastIndexText::unstableRemoveIf(MergeType& md, i auto& vdoc = holder.vdocs_[first->id]; assertrx_throw(!vdoc.keyEntry->Unsorted().empty()); cnt += vdoc.keyEntry->Sorted(0).size(); - ++releventDocs; + ++relevantDocs; ++first; } @@ -161,7 +157,7 @@ typename MergeType::iterator FastIndexText::unstableRemoveIf(MergeType& md, i auto& vdoc = holder.vdocs_[last->id]; assertrx_throw(!vdoc.keyEntry->Unsorted().empty()); cnt += vdoc.keyEntry->Sorted(0).size(); - ++releventDocs; + ++relevantDocs; *first = std::move(*last); ++first; @@ -183,10 +179,10 @@ IdSet::Ptr FastIndexText::afterSelect(FtCtx& fctx, MergeType&& mergeData, FtS const double scalingFactor = mergeData.maxRank > 255 ? 255.0 / mergeData.maxRank : 1.0; const int minRelevancy = getConfig()->minRelevancy * 100 * scalingFactor; - size_t releventDocs = 0; + size_t relevantDocs = 0; switch (ftSortType) { case FtSortType::RankAndID: { - auto itF = unstableRemoveIf(mergeData, minRelevancy, scalingFactor, releventDocs, cnt); + auto itF = unstableRemoveIf(mergeData, minRelevancy, scalingFactor, relevantDocs, cnt); mergeData.erase(itF, mergeData.end()); break; } @@ -199,7 +195,7 @@ IdSet::Ptr FastIndexText::afterSelect(FtCtx& fctx, MergeType&& mergeData, FtS } assertrx_throw(!vdoc.keyEntry->Unsorted().empty()); cnt += vdoc.keyEntry->Sorted(0).size(); - ++releventDocs; + ++relevantDocs; } break; } @@ -211,13 +207,13 @@ IdSet::Ptr FastIndexText::afterSelect(FtCtx& fctx, MergeType&& mergeData, FtS mergedIds->reserve(cnt); if constexpr (std::is_same_v) { if (useExternSt == FtUseExternStatuses::No) { - appendMergedIds(mergeData, releventDocs, + appendMergedIds(mergeData, relevantDocs, [&fctx, &mergedIds](IdSetCRef::iterator ebegin, IdSetCRef::iterator eend, const MergeInfo& vid) { fctx.Add(ebegin, eend, vid.proc); mergedIds->Append(ebegin, eend, IdSet::Unordered); }); } else { - appendMergedIds(mergeData, releventDocs, + appendMergedIds(mergeData, relevantDocs, [&fctx, &mergedIds, &statuses](IdSetCRef::iterator ebegin, IdSetCRef::iterator eend, const MergeInfo& vid) { fctx.Add(ebegin, eend, vid.proc, statuses.rowIds); mergedIds->Append(ebegin, eend, statuses.rowIds, IdSet::Unordered); @@ -225,7 +221,7 @@ IdSet::Ptr FastIndexText::afterSelect(FtCtx& fctx, MergeType&& mergeData, FtS } } else if constexpr (std::is_same_v, MergeType> || std::is_same_v, MergeType>) { if (useExternSt == FtUseExternStatuses::No) { - appendMergedIds(mergeData, releventDocs, + appendMergedIds(mergeData, relevantDocs, [&fctx, &mergedIds, &mergeData](IdSetCRef::iterator ebegin, IdSetCRef::iterator eend, const MergeInfo& vid) { fctx.Add(ebegin, eend, vid.proc, std::move(mergeData.vectorAreas[vid.areaIndex])); mergedIds->Append(ebegin, eend, IdSet::Unordered); @@ -233,7 +229,7 @@ IdSet::Ptr FastIndexText::afterSelect(FtCtx& fctx, MergeType&& mergeData, FtS } else { appendMergedIds( - mergeData, releventDocs, + mergeData, relevantDocs, [&fctx, &mergedIds, &statuses, &mergeData](IdSetCRef::iterator ebegin, IdSetCRef::iterator eend, const MergeInfo& vid) { fctx.Add(ebegin, eend, vid.proc, statuses.rowIds, std::move(mergeData.vectorAreas[vid.areaIndex])); mergedIds->Append(ebegin, eend, statuses.rowIds, IdSet::Unordered); @@ -480,9 +476,9 @@ void FastIndexText::buildVdocs(Container& data) { template template -RX_ALWAYS_INLINE void FastIndexText::appendMergedIds(MergeType& mergeData, size_t releventDocs, F&& appender) { +RX_ALWAYS_INLINE void FastIndexText::appendMergedIds(MergeType& mergeData, size_t relevantDocs, F&& appender) { auto& holder = *this->holder_; - for (size_t i = 0; i < releventDocs; i++) { + for (size_t i = 0; i < relevantDocs; i++) { auto& vid = mergeData[i]; auto& vdoc = holder.vdocs_[vid.id]; appender(vdoc.keyEntry->Sorted(0).begin(), vdoc.keyEntry->Sorted(0).end(), vid); @@ -525,17 +521,13 @@ void FastIndexText::SetOpts(const IndexOpts& opts) { this->holder_->Clear(); } this->holder_->status_ = FullRebuild; - if (this->cache_ft_) { - this->cache_ft_->Clear(); - } + this->cache_ft_.Clear(); for (auto& idx : this->idx_map) { idx.second.SetVDocID(FtKeyEntryData::ndoc); } } else { logPrintf(LogInfo, "FulltextIndex config changed, cache cleared"); - if (this->cache_ft_) { - this->cache_ft_->Clear(); - } + this->cache_ft_.Clear(); } this->holder_->synonyms_->SetConfig(&newCfg); } diff --git a/cpp_src/core/index/indextext/fastindextext.h b/cpp_src/core/index/indextext/fastindextext.h index 3cb5127a7..4cac98953 100644 --- a/cpp_src/core/index/indextext/fastindextext.h +++ b/cpp_src/core/index/indextext/fastindextext.h @@ -66,9 +66,9 @@ class FastIndexText : public IndexText { template void buildVdocs(Data& data); template - void appendMergedIds(MergeType& merged, size_t releventDocs, F&& appender); + void appendMergedIds(MergeType& merged, size_t relevantDocs, F&& appender); template - typename MergeType::iterator unstableRemoveIf(MergeType& md, int minRelevancy, double scalingFactor, size_t& releventDocs, int& cnt); + typename MergeType::iterator unstableRemoveIf(MergeType& md, int minRelevancy, double scalingFactor, size_t& relevantDocs, int& cnt); std::unique_ptr holder_; }; diff --git a/cpp_src/core/index/indextext/fieldsgetter.h b/cpp_src/core/index/indextext/fieldsgetter.h index bce1e0579..ab26ed7cf 100644 --- a/cpp_src/core/index/indextext/fieldsgetter.h +++ b/cpp_src/core/index/indextext/fieldsgetter.h @@ -1,6 +1,8 @@ #pragma once #include "core/ft/usingcontainer.h" +#include "core/keyvalue/key_string.h" #include "core/payload/fieldsset.h" +#include "core/payload/payloadiface.h" #include "vendor/utf8cpp/utf8/core.h" namespace reindexer { @@ -10,16 +12,16 @@ class FieldsGetter { FieldsGetter(const FieldsSet& fields, const PayloadType& plt, KeyValueType type) : fields_(fields), plt_(plt), type_(type) {} RVector, 8> getDocFields(const key_string& doc, std::vector>&) { - if (!utf8::is_valid(doc->cbegin(), doc->cend())) { + if (!utf8::is_valid(doc.cbegin(), doc.cend())) { throw Error(errParams, "Invalid UTF8 string in FullText index"); } - return {{std::string_view(*doc.get()), 0}}; + return {{std::string_view(doc), 0}}; } VariantArray krefs; - // Specific implemetation for composite index + // Specific implementation for composite index RVector, 8> getDocFields(const PayloadValue& doc, std::vector>& strsBuf) { ConstPayload pl(plt_, doc); diff --git a/cpp_src/core/index/indextext/indextext.cc b/cpp_src/core/index/indextext/indextext.cc index e1e5db9c0..f7fc46af3 100644 --- a/cpp_src/core/index/indextext/indextext.cc +++ b/cpp_src/core/index/indextext/indextext.cc @@ -10,13 +10,14 @@ namespace reindexer { template IndexText::IndexText(const IndexText& other) : IndexUnordered(other), - cache_ft_(std::make_unique(other.cacheMaxSize_, other.hitsToCache_)), + cache_ft_(other.cacheMaxSize_, other.hitsToCache_), cacheMaxSize_(other.cacheMaxSize_), hitsToCache_(other.hitsToCache_) { + cache_ft_.CopyInternalPerfStatsFrom(other.cache_ft_); initSearchers(); } -// Generic implemetation for string index +// Generic implementation for string index template void IndexText::initSearchers() { size_t jsonPathIdx = 0; @@ -36,7 +37,7 @@ void IndexText::initSearchers() { throw Error(errParams, "Composite fulltext index '%s' contains duplicated fields", this->name_); } if rx_unlikely (ftFields_.size() > kMaxFtCompositeFields) { - throw Error(errParams, "Unable to create composite fulltext '%s' index with %d fields. Fileds count limit is %d", this->name_, + throw Error(errParams, "Unable to create composite fulltext '%s' index with %d fields. Fields count limit is %d", this->name_, ftFields_.size(), kMaxFtCompositeFields); } } @@ -64,13 +65,26 @@ void IndexText::ReconfigureCache(const NamespaceCacheConfigData& cacheCfg) { if (cacheMaxSize_ != cacheCfg.ftIdxCacheSize || hitsToCache_ != cacheCfg.ftIdxHitsToCache) { cacheMaxSize_ = cacheCfg.ftIdxCacheSize; hitsToCache_ = cacheCfg.ftIdxHitsToCache; - if (cache_ft_) { - cache_ft_ = std::make_unique(cacheMaxSize_, hitsToCache_); + if (cache_ft_.IsActive()) { + cache_ft_.Reinitialize(cacheMaxSize_, hitsToCache_); } } Base::ReconfigureCache(cacheCfg); } +template +IndexPerfStat IndexText::GetIndexPerfStat() { + auto stats = Base::GetIndexPerfStat(); + stats.cache = cache_ft_.GetPerfStat(); + return stats; +} + +template +void IndexText::ResetIndexPerfStat() { + Base::ResetIndexPerfStat(); + cache_ft_.ResetPerfStat(); +} + template void IndexText::build(const RdxContext& rdxCtx) { smart_lock lck(mtx_, rdxCtx); @@ -84,7 +98,7 @@ void IndexText::build(const RdxContext& rdxCtx) { } } -// Generic implemetation for string index +// Generic implementation for string index template SelectKeyResults IndexText::SelectKey(const VariantArray& keys, CondType condition, SortType, Index::SelectOpts opts, const BaseFunctionCtx::Ptr& ctx, const RdxContext& rdxCtx) { @@ -96,9 +110,9 @@ SelectKeyResults IndexText::SelectKey(const VariantArray& keys, CondType cond auto mergeStatuses = this->GetFtMergeStatuses(rdxCtx); bool needPutCache = false; IdSetCacheKey ckey{keys, condition, 0}; - auto cache_ft = cache_ft_->Get(ckey); + auto cache_ft = cache_ft_.Get(ckey); if (cache_ft.valid) { - if (!cache_ft.val.ids) { + if (!cache_ft.val.IsInitialized()) { needPutCache = true; } else if (ctx->type == BaseFunctionCtx::CtxType::kFtArea && (!cache_ft.val.ctx || !(cache_ft.val.ctx->type == BaseFunctionCtx::CtxType::kFtArea))) { @@ -170,7 +184,7 @@ SelectKeyResults IndexText::doSelectKey(const VariantArray& keys, const std:: } } if (need_put && mergedIds->size()) { - cache_ft_->Put(*ckey, FtIdSetCacheVal{IdSet::Ptr(mergedIds), std::move(ftCtxDataBase)}); + cache_ft_.Put(*ckey, FtIdSetCacheVal{IdSet::Ptr(mergedIds), std::move(ftCtxDataBase)}); } res.emplace_back(std::move(mergedIds)); diff --git a/cpp_src/core/index/indextext/indextext.h b/cpp_src/core/index/indextext/indextext.h index 3a0af5a12..cf20caaf7 100644 --- a/cpp_src/core/index/indextext/indextext.h +++ b/cpp_src/core/index/indextext/indextext.h @@ -1,6 +1,5 @@ #pragma once -#include #include "core/ft/config/baseftconfig.h" #include "core/ft/filters/itokenfilter.h" #include "core/ft/ft_fast/dataholder.h" @@ -21,7 +20,7 @@ class IndexText : public IndexUnordered { IndexText(const IndexText& other); IndexText(const IndexDef& idef, PayloadType&& payloadType, FieldsSet&& fields, const NamespaceCacheConfigData& cacheCfg) : IndexUnordered(idef, std::move(payloadType), std::move(fields), cacheCfg), - cache_ft_(std::make_unique(cacheCfg.ftIdxCacheSize, cacheCfg.ftIdxHitsToCache)), + cache_ft_(cacheCfg.ftIdxCacheSize, cacheCfg.ftIdxHitsToCache), cacheMaxSize_(cacheCfg.ftIdxCacheSize), hitsToCache_(cacheCfg.ftIdxHitsToCache) { this->selectKeyType_ = KeyValueType::String{}; @@ -42,25 +41,25 @@ class IndexText : public IndexUnordered { // Rebuild will be done on first select } void CommitFulltext() override final { - cache_ft_ = std::make_unique(cacheMaxSize_, hitsToCache_); + cache_ft_.Reinitialize(cacheMaxSize_, hitsToCache_); commitFulltextImpl(); this->isBuilt_ = true; } void SetSortedIdxCount(int) override final {} void DestroyCache() override { Base::DestroyCache(); - cache_ft_.reset(); + cache_ft_.ResetImpl(); } void ClearCache() override { Base::ClearCache(); - if (cache_ft_) { - cache_ft_->Clear(); - } + cache_ft_.Clear(); } void ClearCache(const std::bitset& s) override { Base::ClearCache(s); } void MarkBuilt() noexcept override { assertrx(0); } bool IsFulltext() const noexcept override final { return true; } void ReconfigureCache(const NamespaceCacheConfigData& cacheCfg) override final; + IndexPerfStat GetIndexPerfStat() override final; + void ResetIndexPerfStat() override final; protected: using Mutex = MarkedMutex; @@ -76,7 +75,7 @@ class IndexText : public IndexUnordered { void initSearchers(); FieldsGetter Getter(); - std::unique_ptr cache_ft_; + FtIdSetCache cache_ft_; size_t cacheMaxSize_; uint32_t hitsToCache_; diff --git a/cpp_src/core/index/indexunordered.cc b/cpp_src/core/index/indexunordered.cc index 2e62a8621..96493c972 100644 --- a/cpp_src/core/index/indexunordered.cc +++ b/cpp_src/core/index/indexunordered.cc @@ -120,11 +120,12 @@ template IndexUnordered::IndexUnordered(const IndexUnordered& other) : Base(other), idx_map(other.idx_map), - cache_(nullptr), cacheMaxSize_(other.cacheMaxSize_), hitsToCache_(other.hitsToCache_), empty_ids_(other.empty_ids_), - tracker_(other.tracker_) {} + tracker_(other.tracker_) { + cache_.CopyInternalPerfStatsFrom(other.cache_); +} template size_t heap_size(const key_type& /*kt*/) { @@ -133,12 +134,12 @@ size_t heap_size(const key_type& /*kt*/) { template <> size_t heap_size(const key_string& kt) { - return kt->heap_size() + sizeof(*kt.get()); + return kt.heap_size(); } template <> size_t heap_size(const key_string_with_hash& kt) { - return kt->heap_size() + sizeof(*kt.get()); + return kt.heap_size(); } struct DeepClean { @@ -175,7 +176,7 @@ Variant IndexUnordered::Upsert(const Variant& key, IdType id, bool& clearCach // reset cache if (key.Type().Is()) { // TODO maybe error or default value if the index is not sparse if (this->empty_ids_.Unsorted().Add(id, IdSet::Auto, this->sortedIdxCount_)) { - cache_.reset(); + cache_.ResetImpl(); clearCache = true; this->isBuilt_ = false; } @@ -191,7 +192,7 @@ Variant IndexUnordered::Upsert(const Variant& key, IdType id, bool& clearCach } if (keyIt->second.Unsorted().Add(id, this->opts_.IsPK() ? IdSet::Ordered : IdSet::Auto, this->sortedIdxCount_)) { - cache_.reset(); + cache_.ResetImpl(); clearCache = true; this->isBuilt_ = false; } @@ -207,7 +208,7 @@ void IndexUnordered::Delete(const Variant& key, IdType id, StringsHolder& str if (key.Type().Is()) { this->empty_ids_.Unsorted().Erase(id); // ignore result this->isBuilt_ = false; - cache_.reset(); + cache_.ResetImpl(); clearCache = true; return; } @@ -218,7 +219,7 @@ void IndexUnordered::Delete(const Variant& key, IdType id, StringsHolder& str delMemStat(keyIt); delcnt = keyIt->second.Unsorted().Erase(id); this->isBuilt_ = false; - cache_.reset(); + cache_.ResetImpl(); clearCache = true; } assertf(delcnt || this->opts_.IsArray() || this->Opts().IsSparse(), "Delete non-existing id from index '%s' id=%d,key=%s (%s)", @@ -254,23 +255,23 @@ template bool IndexUnordered::tryIdsetCache(const VariantArray& keys, CondType condition, SortType sortId, const std::function& selector, SelectKeyResult& res) { size_t idsCount; - if (!cache_ || IsComposite(this->Type())) { + if (!cache_.IsActive() || IsComposite(this->Type())) { selector(res, idsCount); return false; } bool scanWin = false; IdSetCacheKey ckey{keys, condition, sortId}; - auto cached = cache_->Get(ckey); + auto cached = cache_.Get(ckey); if (cached.valid) { - if (!cached.val.ids) { + if (!cached.val.IsInitialized()) { scanWin = selector(res, idsCount); if (!scanWin) { // Do not use generic sort, when expecting duplicates in the id sets const bool useGenericSort = res.deferedExplicitSort && !(this->opts_.IsArray() && (condition == CondEq || condition == CondSet)); - cache_->Put(ckey, - res.MergeIdsets(SelectKeyResult::MergeOptions{.genericSort = useGenericSort, .shrinkResult = true}, idsCount)); + cache_.Put(ckey, + res.MergeIdsets(SelectKeyResult::MergeOptions{.genericSort = useGenericSort, .shrinkResult = true}, idsCount)); } } else { res.emplace_back(std::move(cached.val.ids)); @@ -294,7 +295,7 @@ SelectKeyResults IndexUnordered::SelectKey(const VariantArray& keys, CondType switch (condition) { case CondEmpty: if (!this->opts_.IsArray() && !this->opts_.IsSparse()) { - throw Error(errParams, "The 'is NULL' condition is suported only by 'sparse' or 'array' indexes"); + throw Error(errParams, "The 'is NULL' condition is supported only by 'sparse' or 'array' indexes"); } res.emplace_back(this->empty_ids_, sortId); break; @@ -410,8 +411,8 @@ template void IndexUnordered::Commit() { this->empty_ids_.Unsorted().Commit(); - if (!cache_) { - cache_.reset(new IdSetCache(cacheMaxSize_, hitsToCache_)); + if (!cache_.IsActive()) { + cache_.Reinitialize(cacheMaxSize_, hitsToCache_); } if (!tracker_.isUpdated()) { @@ -454,17 +455,28 @@ void IndexUnordered::SetSortedIdxCount(int sortedIdxCount) { } } +template +IndexPerfStat IndexUnordered::GetIndexPerfStat() { + auto stats = Base::GetIndexPerfStat(); + stats.cache = cache_.GetPerfStat(); + return stats; +} + +template +void IndexUnordered::ResetIndexPerfStat() { + Base::ResetIndexPerfStat(); + cache_.ResetPerfStat(); +} + template IndexMemStat IndexUnordered::GetMemStat(const RdxContext& ctx) { IndexMemStat ret = Base::GetMemStat(ctx); ret.uniqKeysCount = idx_map.size(); - if (cache_) { - ret.idsetCache = cache_->GetMemStat(); - } + ret.idsetCache = cache_.GetMemStat(); ret.trackedUpdatesCount = tracker_.updatesSize(); ret.trackedUpdatesBuckets = tracker_.updatesBuckets(); ret.trackedUpdatesSize = tracker_.allocated(); - ret.trackedUpdatesOveflow = tracker_.overflow(); + ret.trackedUpdatesOverflow = tracker_.overflow(); return ret; } @@ -490,11 +502,7 @@ void IndexUnordered::dump(S& os, std::string_view step, std::string_view offs os << '\n' << newOffset; } os << "},\n" << newOffset << "cache: "; - if (cache_) { - cache_->Dump(os, step, newOffset); - } else { - os << "empty"; - } + cache_.Dump(os, step, newOffset); os << ",\n" << newOffset << "empty_ids: "; empty_ids_.Dump(os, step, newOffset); os << "\n" << offset << '}'; @@ -513,8 +521,8 @@ void IndexUnordered::ReconfigureCache(const NamespaceCacheConfigData& cacheCf if (cacheMaxSize_ != cacheCfg.idxIdsetCacheSize || hitsToCache_ != cacheCfg.idxIdsetHitsToCache) { cacheMaxSize_ = cacheCfg.idxIdsetCacheSize; hitsToCache_ = cacheCfg.idxIdsetHitsToCache; - if (cache_) { - cache_.reset(new IdSetCache(cacheMaxSize_, hitsToCache_)); + if (cache_.IsActive()) { + cache_.Reinitialize(cacheMaxSize_, hitsToCache_); } } } diff --git a/cpp_src/core/index/indexunordered.h b/cpp_src/core/index/indexunordered.h index 418931c9e..f8dbfae26 100644 --- a/cpp_src/core/index/indexunordered.h +++ b/cpp_src/core/index/indexunordered.h @@ -5,7 +5,6 @@ #include "core/idsetcache.h" #include "core/index/indexstore.h" #include "core/index/updatetracker.h" -#include "estl/atomic_unique_ptr.h" namespace reindexer { @@ -29,8 +28,8 @@ class IndexUnordered : public IndexStore> { IndexUnordered(const IndexDef& idef, PayloadType&& payloadType, FieldsSet&& fields, const NamespaceCacheConfigData& cacheCfg); IndexUnordered(const IndexUnordered& other); - Variant Upsert(const Variant& key, IdType id, bool& chearCache) override; - void Delete(const Variant& key, IdType id, StringsHolder&, bool& chearCache) override; + Variant Upsert(const Variant& key, IdType id, bool& clearCache) override; + void Delete(const Variant& key, IdType id, StringsHolder&, bool& clearCache) override; SelectKeyResults SelectKey(const VariantArray& keys, CondType cond, SortType stype, Index::SelectOpts opts, const BaseFunctionCtx::Ptr& ctx, const RdxContext&) override; void Commit() override; @@ -39,18 +38,12 @@ class IndexUnordered : public IndexStore> { IndexMemStat GetMemStat(const RdxContext&) override; size_t Size() const noexcept override final { return idx_map.size(); } void SetSortedIdxCount(int sortedIdxCount) override; + IndexPerfStat GetIndexPerfStat() override; + void ResetIndexPerfStat() override; bool HoldsStrings() const noexcept override; - void DestroyCache() override { cache_.reset(); } - void ClearCache() override { - if (cache_) { - cache_->Clear(); - } - } - void ClearCache(const std::bitset& s) override { - if (cache_) { - cache_->ClearSorted(s); - } - } + void DestroyCache() override { cache_.ResetImpl(); } + void ClearCache() override { cache_.Clear(); } + void ClearCache(const std::bitset& s) override { cache_.ClearSorted(s); } void Dump(std::ostream& os, std::string_view step = " ", std::string_view offset = "") const override { dump(os, step, offset); } void EnableUpdatesCountingMode(bool val) noexcept override { tracker_.enableCountingMode(val); } @@ -66,7 +59,7 @@ class IndexUnordered : public IndexStore> { // Index map T idx_map; // Merged idsets cache - atomic_unique_ptr cache_; + IdSetCache cache_; size_t cacheMaxSize_; uint32_t hitsToCache_; // Empty ids diff --git a/cpp_src/core/index/rtree/indexrtree.cc b/cpp_src/core/index/rtree/indexrtree.cc index 63aa4a188..26b8331d2 100644 --- a/cpp_src/core/index/rtree/indexrtree.cc +++ b/cpp_src/core/index/rtree/indexrtree.cc @@ -82,7 +82,7 @@ void IndexRTree::Upsert(VariantArra if (keyIt->second.Unsorted().Add(id, this->opts_.IsPK() ? IdSet::Ordered : IdSet::Auto, this->sortedIdxCount_)) { this->isBuilt_ = false; // reset cache - this->cache_.reset(); + this->cache_.ResetImpl(); clearCache = true; } this->tracker_.markUpdated(this->idx_map, keyIt); @@ -105,7 +105,7 @@ void IndexRTree::Delete(const Varia if (keyIt == this->idx_map.end()) { return; } - this->cache_.reset(); + this->cache_.ResetImpl(); clearCache = true; this->isBuilt_ = false; @@ -113,7 +113,7 @@ void IndexRTree::Delete(const Varia delcnt = keyIt->second.Unsorted().Erase(id); (void)delcnt; // TODO: we have to implement removal of composite indexes (doesn't work right now) - assertf(this->Opts().IsSparse() || delcnt, "Delete unexists id from index '%s' id=%d,key=%s (%s)", this->name_, id, + assertf(this->Opts().IsSparse() || delcnt, "Delete non-existent id from index '%s' id=%d,key=%s (%s)", this->name_, id, Variant(keys).template As(this->payloadType_, this->Fields()), Variant(keyIt->first).As(this->payloadType_, this->Fields())); diff --git a/cpp_src/core/index/string_map.h b/cpp_src/core/index/string_map.h index 2f7d0a616..5ed267fa8 100644 --- a/cpp_src/core/index/string_map.h +++ b/cpp_src/core/index/string_map.h @@ -14,13 +14,13 @@ struct less_key_string { less_key_string(const CollateOpts& collateOpts = CollateOpts()) : collateOpts_(collateOpts) {} bool operator()(const key_string& lhs, const key_string& rhs) const noexcept { - return collateCompare(*lhs, *rhs, collateOpts_) == ComparationResult::Lt; + return collateCompare(lhs, rhs, collateOpts_) == ComparationResult::Lt; } bool operator()(std::string_view lhs, const key_string& rhs) const noexcept { - return collateCompare(lhs, *rhs, collateOpts_) == ComparationResult::Lt; + return collateCompare(lhs, rhs, collateOpts_) == ComparationResult::Lt; } bool operator()(const key_string& lhs, std::string_view rhs) const noexcept { - return collateCompare(*lhs, rhs, collateOpts_) == ComparationResult::Lt; + return collateCompare(lhs, rhs, collateOpts_) == ComparationResult::Lt; } CollateOpts collateOpts_; }; @@ -29,7 +29,7 @@ class key_string_with_hash : public key_string { public: key_string_with_hash() noexcept : key_string() {} key_string_with_hash(key_string s, CollateMode cm) - : key_string(std::move(s)), hash_(collateHash(**static_cast(this), cm)) {} + : key_string(std::move(s)), hash_(collateHash(*static_cast(this), cm)) {} key_string_with_hash(const key_string_with_hash& o) noexcept : key_string(o), hash_(o.hash_) {} key_string_with_hash(key_string_with_hash&& o) noexcept : key_string(std::move(o)), hash_(o.hash_) {} key_string_with_hash& operator=(key_string_with_hash&& o) noexcept { @@ -47,13 +47,13 @@ struct equal_key_string { equal_key_string(const CollateOpts& collateOpts = CollateOpts()) : collateOpts_(collateOpts) {} bool operator()(const key_string& lhs, const key_string& rhs) const noexcept { - return collateCompare(*lhs, *rhs, collateOpts_) == ComparationResult::Eq; + return collateCompare(lhs, rhs, collateOpts_) == ComparationResult::Eq; } bool operator()(std::string_view lhs, const key_string& rhs) const noexcept { - return collateCompare(lhs, *rhs, collateOpts_) == ComparationResult::Eq; + return collateCompare(lhs, rhs, collateOpts_) == ComparationResult::Eq; } bool operator()(const key_string& lhs, std::string_view rhs) const noexcept { - return collateCompare(*lhs, rhs, collateOpts_) == ComparationResult::Eq; + return collateCompare(lhs, rhs, collateOpts_) == ComparationResult::Eq; } private: @@ -64,7 +64,7 @@ struct hash_key_string { using is_transparent = void; hash_key_string(CollateMode collateMode = CollateNone) noexcept : collateMode_(collateMode) {} - size_t operator()(const key_string& s) const noexcept { return collateHash(*s, collateMode_); } + size_t operator()(const key_string& s) const noexcept { return collateHash(s, collateMode_); } size_t operator()(std::string_view s) const noexcept { return collateHash(s, collateMode_); } size_t operator()(const key_string_with_hash& s) const noexcept { return s.GetHash(); } diff --git a/cpp_src/core/indexopts.cc b/cpp_src/core/indexopts.cc index b103a6b8c..68a32c8dd 100644 --- a/cpp_src/core/indexopts.cc +++ b/cpp_src/core/indexopts.cc @@ -23,10 +23,26 @@ IndexOpts::IndexOpts(const std::string& sortOrderUTF8, uint8_t flags, RTreeIndex : options(flags), collateOpts_(sortOrderUTF8), rtreeType_(rtreeType) {} bool IndexOpts::IsEqual(const IndexOpts& other, IndexComparison cmpType) const noexcept { - return options == other.options && (cmpType == IndexComparison::SkipConfig || config == other.config) && - collateOpts_.mode == other.collateOpts_.mode && - collateOpts_.sortOrderTable.GetSortOrderCharacters() == other.collateOpts_.sortOrderTable.GetSortOrderCharacters() && - rtreeType_ == other.rtreeType_; + auto thisCopy = *this; + thisCopy.Dense(other.IsDense()); + + // Compare without config and 'IsDense' option + const bool baseEqual = + thisCopy.options == other.options && collateOpts_.mode == other.collateOpts_.mode && + collateOpts_.sortOrderTable.GetSortOrderCharacters() == other.collateOpts_.sortOrderTable.GetSortOrderCharacters() && + rtreeType_ == other.rtreeType_; + if (!baseEqual) { + return false; + } + switch (cmpType) { + case IndexComparison::BasicCompatibilityOnly: + return true; + case IndexComparison::SkipConfig: + return IsDense() == other.IsDense(); + case IndexComparison::Full: + default: + return IsDense() == other.IsDense() && config == other.config; + } } IndexOpts& IndexOpts::PK(bool value) & noexcept { @@ -59,6 +75,11 @@ IndexOpts& IndexOpts::SetCollateMode(CollateMode mode) & noexcept { return *this; } +IndexOpts& IndexOpts::SetCollateSortOrder(reindexer::SortingPrioritiesTable&& sortOrder) & noexcept { + collateOpts_.sortOrderTable = std::move(sortOrder); + return *this; +} + template void IndexOpts::Dump(T& os) const { os << '{'; diff --git a/cpp_src/core/indexopts.h b/cpp_src/core/indexopts.h index 90446e8bb..dad7273cd 100644 --- a/cpp_src/core/indexopts.h +++ b/cpp_src/core/indexopts.h @@ -13,7 +13,7 @@ struct CollateOpts { void Dump(T& os) const; }; -enum class IndexComparison { WithConfig, SkipConfig }; +enum class IndexComparison { Full, SkipConfig, BasicCompatibilityOnly }; /// Cpp version of IndexOpts: includes /// sort order table which is not possible @@ -43,6 +43,10 @@ struct IndexOpts { [[nodiscard]] IndexOpts&& RTreeType(RTreeIndexType type) && noexcept { return std::move(RTreeType(type)); } IndexOpts& SetCollateMode(CollateMode mode) & noexcept; [[nodiscard]] IndexOpts&& SetCollateMode(CollateMode mode) && noexcept { return std::move(SetCollateMode(mode)); } + IndexOpts& SetCollateSortOrder(reindexer::SortingPrioritiesTable&& sortOrder) & noexcept; + [[nodiscard]] IndexOpts&& SetCollateSortOrder(reindexer::SortingPrioritiesTable&& sortOrder) && noexcept { + return std::move(SetCollateSortOrder(std::move(sortOrder))); + } template >* = nullptr> IndexOpts& SetConfig(Str&& conf) & { config = std::forward(conf); @@ -52,7 +56,8 @@ struct IndexOpts { [[nodiscard]] IndexOpts&& SetConfig(Str&& config) && { return std::move(SetConfig(std::forward(config))); } - CollateMode GetCollateMode() const noexcept { return static_cast(collateOpts_.mode); } + CollateMode GetCollateMode() const noexcept { return collateOpts_.mode; } + reindexer::SortingPrioritiesTable GetCollateSortOrder() const noexcept { return collateOpts_.sortOrderTable; } bool IsEqual(const IndexOpts& other, IndexComparison cmpType) const noexcept; diff --git a/cpp_src/core/item.cc b/cpp_src/core/item.cc index a53e98659..c998a5fc7 100644 --- a/cpp_src/core/item.cc +++ b/cpp_src/core/item.cc @@ -111,15 +111,17 @@ Item::FieldRef& Item::FieldRef::operator=(span arr) { } } else { if (!itemImpl_->holder_) { - itemImpl_->holder_ = std::make_unique>(); + itemImpl_->holder_ = std::make_unique(); } for (auto& elem : arr) { if constexpr (std::is_same_v) { - itemImpl_->holder_->push_back(elem.toString()); + itemImpl_->holder_->emplace_back(elem.getKeyString()); + } else if constexpr (std::is_same_v) { + itemImpl_->holder_->emplace_back(elem); } else { - itemImpl_->holder_->push_back(elem); + itemImpl_->holder_->emplace_back(make_key_string(elem)); } - pl.Set(field_, pos++, Variant(p_string{&itemImpl_->holder_->back()}, Variant::no_hold_t{})); + pl.Set(field_, pos++, Variant(p_string{itemImpl_->holder_->back()}, Variant::no_hold_t{})); } } } else { diff --git a/cpp_src/core/itemimpl.cc b/cpp_src/core/itemimpl.cc index d7f1b1387..10713ff35 100644 --- a/cpp_src/core/itemimpl.cc +++ b/cpp_src/core/itemimpl.cc @@ -21,11 +21,11 @@ void ItemImpl::SetField(int field, const VariantArray& krs) { VariantArray krsCopy; krsCopy.reserve(krs.size()); if (!holder_) { - holder_ = std::make_unique>(); + holder_ = std::make_unique(); } for (auto& kr : krs) { - holder_->push_back(kr.As()); - krsCopy.emplace_back(p_string{&holder_->back()}); + auto& back = holder_->emplace_back(kr.As()); + krsCopy.emplace_back(p_string{back}); } GetPayload().Set(field, krsCopy, false); } else { @@ -148,6 +148,25 @@ Error ItemImpl::GetProtobuf(WrSerializer& wrser) { return Error(); } +void ItemImpl::Clear() { + static const TagsMatcher kEmptyTagsMaptcher; + tagsMatcher_ = kEmptyTagsMaptcher; + precepts_.clear(); + cjson_ = std::string_view(); + holder_.reset(); + sourceData_.reset(); + largeJSONStrings_.clear(); + tupleData_.reset(); + ser_ = WrSerializer(); + + GetPayload().Reset(); + payloadValue_.SetLSN(lsn_t()); + + unsafe_ = false; + ns_.reset(); + realValue_.Free(); +} + // Construct item from compressed json void ItemImpl::FromCJSON(std::string_view slice, bool pkOnly, Recoder* recoder) { payloadValue_.Clone(); @@ -173,7 +192,7 @@ void ItemImpl::FromCJSON(std::string_view slice, bool pkOnly, Recoder* recoder) Payload pl = GetPayload(); pl.Reset(); if (!holder_) { - holder_ = std::make_unique>(); + holder_ = std::make_unique(); } CJsonDecoder decoder(tagsMatcher_, *holder_); diff --git a/cpp_src/core/itemimpl.h b/cpp_src/core/itemimpl.h index 9facf583a..f4232685b 100644 --- a/cpp_src/core/itemimpl.h +++ b/cpp_src/core/itemimpl.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include "core/cjson/msgpackdecoder.h" @@ -98,24 +97,7 @@ class ItemImpl : public ItemImplRawData { const std::vector& GetPrecepts() const noexcept { return precepts_; } void Unsafe(bool enable) noexcept { unsafe_ = enable; } bool IsUnsafe() const noexcept { return unsafe_; } - void Clear() { - tagsMatcher_ = TagsMatcher(); - precepts_.clear(); - cjson_ = std::string_view(); - holder_.reset(); - keyStringsHolder_.reset(); - sourceData_.reset(); - largeJSONStrings_.clear(); - tupleData_.reset(); - ser_ = WrSerializer(); - - GetPayload().Reset(); - payloadValue_.SetLSN(lsn_t()); - - unsafe_ = false; - ns_.reset(); - realValue_.Free(); - } + void Clear(); void SetNamespace(std::shared_ptr ns) noexcept { ns_ = std::move(ns); } std::shared_ptr GetNamespace() const noexcept { return ns_; } static void validateModifyArray(const VariantArray& values); diff --git a/cpp_src/core/itemimplrawdata.h b/cpp_src/core/itemimplrawdata.h index f8581241b..2a8a0d9bd 100644 --- a/cpp_src/core/itemimplrawdata.h +++ b/cpp_src/core/itemimplrawdata.h @@ -1,14 +1,16 @@ #pragma once #include "core/keyvalue/key_string.h" +#include "estl/h_vector.h" #include "payload/payloadvalue.h" -#include #include namespace reindexer { struct ItemImplRawData { + using HolderT = h_vector; + ItemImplRawData() = default; explicit ItemImplRawData(PayloadValue v) : payloadValue_(std::move(v)) {} ItemImplRawData(const ItemImplRawData&) = delete; @@ -21,8 +23,7 @@ struct ItemImplRawData { std::unique_ptr sourceData_; std::vector> largeJSONStrings_; std::vector precepts_; - std::unique_ptr> holder_; - std::unique_ptr> keyStringsHolder_; + std::unique_ptr holder_; }; } // namespace reindexer diff --git a/cpp_src/core/joincache.h b/cpp_src/core/joincache.h index 63314f8d0..446a3e3d3 100644 --- a/cpp_src/core/joincache.h +++ b/cpp_src/core/joincache.h @@ -48,14 +48,16 @@ struct JoinPreResult; struct JoinCacheVal { JoinCacheVal() = default; - size_t Size() const noexcept { return ids_ ? (sizeof(*ids_.get()) + ids_->heap_size()) : 0; } - IdSet::Ptr ids_; + size_t Size() const noexcept { return ids ? (sizeof(*ids.get()) + ids->heap_size()) : 0; } + bool IsInitialized() const noexcept { return inited; } + + IdSet::Ptr ids; bool matchedAtLeastOnce = false; bool inited = false; std::shared_ptr preResult; }; -using JoinCache = LRUCache; +using JoinCache = LRUCache, LRUWithAtomicPtr::No>; struct JoinCacheRes { bool haveData = false; diff --git a/cpp_src/core/key_value_type.h b/cpp_src/core/key_value_type.h index 6c8dfc056..fbe9a7c43 100644 --- a/cpp_src/core/key_value_type.h +++ b/cpp_src/core/key_value_type.h @@ -282,6 +282,7 @@ class KeyValueType { class key_string; class Uuid; +struct p_string; template <> RX_ALWAYS_INLINE KeyValueType KeyValueType::From() { @@ -313,4 +314,14 @@ RX_ALWAYS_INLINE KeyValueType KeyValueType::From() { return KeyValueType::Uuid{}; } +template <> +RX_ALWAYS_INLINE KeyValueType KeyValueType::From() { + return KeyValueType::String{}; +} + +template <> +RX_ALWAYS_INLINE KeyValueType KeyValueType::From() { + return KeyValueType::String{}; +} + } // namespace reindexer diff --git a/cpp_src/core/keyvalue/key_string.cc b/cpp_src/core/keyvalue/key_string.cc new file mode 100644 index 000000000..10c6cc4ea --- /dev/null +++ b/cpp_src/core/keyvalue/key_string.cc @@ -0,0 +1,10 @@ +#include "key_string.h" +#include "tools/errors.h" + +namespace reindexer { + +void key_string::throwMaxLenOverflow(size_t len) { + throw Error(errParams, "Key_string length overflow: %d > max key_string length (%d)", len, kMaxLen); +} + +} // namespace reindexer diff --git a/cpp_src/core/keyvalue/key_string.h b/cpp_src/core/keyvalue/key_string.h index 94b678fec..e0205b614 100644 --- a/cpp_src/core/keyvalue/key_string.h +++ b/cpp_src/core/keyvalue/key_string.h @@ -1,114 +1,142 @@ #pragma once +#include #include -#include +#include +#include +#include #include +#include "estl/defines.h" #include "estl/fast_hash_traits.h" -#include "estl/intrusive_ptr.h" namespace reindexer { -typedef const std::string const_string; - -class base_key_string : public std::string { +class key_string_impl { public: - base_key_string(std::string_view str) : std::string(str.data(), str.length()) { - export_hdr_.refcounter.store(0, std::memory_order_release); - bind(); - } - template - base_key_string(Args&&... args) : std::string(std::forward(args)...) { - export_hdr_.refcounter.store(0, std::memory_order_release); - bind(); - } + using size_type = int32_t; - template - void assign(Args&&... args) { - const_string::assign(std::forward(args)...); - bind(); - } - static ptrdiff_t export_hdr_offset() noexcept { - static base_key_string sample; - return ptrdiff_t(reinterpret_cast(&sample.export_hdr_) - reinterpret_cast(&sample)); - } - size_t heap_size() noexcept { - // Check for SSO (small string optimization) - uintptr_t pstart = uintptr_t(this); - uintptr_t pend = pstart + sizeof(std::string); - uintptr_t pdata = uintptr_t(data()); - return (pdata >= pstart && pdata < pend) ? 0 : (capacity() + 1); // +1 for terminating \0 - } + key_string_impl(const key_string_impl&) = delete; + key_string_impl(key_string_impl&&) = delete; + key_string_impl& operator=(const key_string_impl&) = delete; + key_string_impl& operator=(key_string_impl&&) = delete; + + static ptrdiff_t export_hdr_offset() noexcept { return 0; } + const char* data() const noexcept { return data_; } + size_t size() const noexcept { return export_hdr_.len; } + operator std::string_view() const noexcept { return std::string_view(data_, export_hdr_.len); } - // delete all modification methods - to be sure, that base_key_string is mutable, and export will not invalidate after construction - iterator begin() = delete; - iterator end() = delete; - char& operator[](int) = delete; - template - void insert(Args&&... args) = delete; - template - void append(Args&&... args) = delete; - template - void copy(Args&&... args) = delete; - template - void replace(Args&&... args) = delete; - void push_back(char c) = delete; - template - void erase(Args&&... args) = delete; - template - void reserve(Args&&... args) = delete; - template - void resize(Args&&... args) = delete; - void at(int) = delete; - void shrink_to_fit() = delete; - void clear() = delete; - -protected: - friend void intrusive_ptr_add_ref(base_key_string* x) noexcept { + // Unsafe ref counter methods for direct payload access + static void addref_unsafe(const key_string_impl* x) noexcept { if (x) { x->export_hdr_.refcounter.fetch_add(1, std::memory_order_relaxed); } } - friend void intrusive_ptr_release(base_key_string* x) noexcept { - if (x && x->export_hdr_.refcounter.fetch_sub(1, std::memory_order_acq_rel) == 1) { - delete x; // NOLINT(*.NewDelete) False positive + static void release_unsafe(const key_string_impl* x) noexcept { + if ((x && x->export_hdr_.refcounter.fetch_sub(1, std::memory_order_acq_rel) == 1)) { + x->~key_string_impl(); + operator delete(const_cast(x)); } } - friend bool intrusive_ptr_is_unique(base_key_string* x) noexcept { - // std::memory_order_acquire - is essential for COW constructions based on intrusive_ptr - return !x || (x->export_hdr_.refcounter.load(std::memory_order_acquire) == 1); - } - void bind() noexcept { - export_hdr_.cstr = std::string::c_str(); - export_hdr_.len = length(); +private: + friend class key_string; + // Only key_string should be able to construct key_string_impl + explicit key_string_impl(std::string_view str) noexcept { + std::memcpy(data_, str.data(), str.size()); + export_hdr_.cstr = data_; + export_hdr_.len = str.size(); + export_hdr_.refcounter.store(0, std::memory_order_relaxed); } struct export_hdr { const void* cstr; - int32_t len; - std::atomic refcounter; + size_type len; + mutable std::atomic refcounter; } export_hdr_; + char data_[]; }; static_assert(sizeof(std::atomic) == sizeof(int8_t[4]), "refcounter in cbinding (struct reindexer_string) is reserved via int8_t array. Sizes must be same"); -class key_string : public intrusive_ptr { +class key_string { public: - using intrusive_ptr::intrusive_ptr; + using const_iterator = const char*; + using iterator = const_iterator; + + key_string() noexcept : impl_(nullptr) {} + explicit key_string(std::nullptr_t) noexcept : key_string() {} + key_string(const key_string_impl* str) noexcept : impl_(str) { key_string_impl::addref_unsafe(impl_); } + key_string(const key_string_impl* str, bool add_ref) noexcept : impl_(str) { + if (add_ref) { + key_string_impl::addref_unsafe(impl_); + } + } + explicit key_string(std::string_view str) { + if rx_unlikely (str.size() > kMaxLen) { + throwMaxLenOverflow(str.size()); + } + void* impl = operator new(sizeof(key_string_impl) + str.size()); + impl_ = new (impl) key_string_impl(str); + key_string_impl::addref_unsafe(impl_); + } + key_string(const key_string& rhs) noexcept : impl_(rhs.impl_) { key_string_impl::addref_unsafe(impl_); } + key_string(key_string&& rhs) noexcept : impl_(rhs.impl_) { rhs.impl_ = nullptr; } + ~key_string() { key_string_impl::release_unsafe(impl_); } + + key_string& operator=(key_string&& rhs) noexcept { + swap(rhs); + return *this; + } + // NOLINTNEXTLINE(bugprone-unhandled-self-assignment) + key_string& operator=(const key_string& rhs) noexcept { + key_string copy(rhs); + swap(copy); + return *this; + } + + const key_string_impl* get() const noexcept { return impl_; } + size_t size() const noexcept { return impl_ ? impl_->size() : 0; } + const char* data() const& noexcept { return impl_ ? impl_->data() : nullptr; } + const char* data() && = delete; + + explicit operator bool() const noexcept { return impl_; } + operator std::string_view() const noexcept { return impl_ ? std::string_view(*impl_) : std::string_view(); } + void swap(key_string& rhs) noexcept { std::swap(impl_, rhs.impl_); } + size_t heap_size() const noexcept { return impl_ ? (sizeof(key_string_impl) + impl_->size()) : 0; } + + iterator begin() const& noexcept { return impl_ ? impl_->data() : nullptr; } + iterator end() const& noexcept { return impl_ ? (impl_->data() + impl_->size()) : nullptr; } + const_iterator cbegin() const& noexcept { return begin(); } + const_iterator cend() const& noexcept { return end(); } + iterator begin() const&& = delete; + iterator end() && = delete; + const_iterator cbegin() && = delete; + const_iterator cend() && = delete; + +private: + constexpr static size_t kMaxLen = std::numeric_limits::max(); + + [[noreturn]] void throwMaxLenOverflow(size_t len); + + const key_string_impl* impl_; }; template key_string make_key_string(Args&&... args) { - return key_string(new base_key_string(std::forward(args)...)); + return key_string(std::string_view(std::forward(args)...)); } -inline static bool operator==(const key_string& rhs, const key_string& lhs) noexcept { return *rhs == *lhs; } +template +T& operator<<(T& os, const key_string& k) { + return os << std::string_view(k); +} -// Unchecked cast to derived class! -// It assumes, that all strings in payload are intrusive_ptr -inline void key_string_add_ref(std::string* str) noexcept { intrusive_ptr_add_ref(reinterpret_cast(str)); } -inline void key_string_release(std::string* str) noexcept { intrusive_ptr_release(reinterpret_cast(str)); } +inline static bool operator==(const key_string& lhs, const key_string& rhs) noexcept { + return std::string_view(rhs) == std::string_view(lhs); +} +inline static bool operator==(const key_string& lhs, std::string_view rhs) noexcept { return std::string_view(lhs) == rhs; } +inline static bool operator==(std::string_view lhs, const key_string& rhs) noexcept { return std::string_view(rhs) == lhs; } template <> struct is_recommends_sc_hash_map { @@ -116,17 +144,12 @@ struct is_recommends_sc_hash_map { }; } // namespace reindexer -namespace std { -template <> -struct hash { -public: - size_t operator()(const reindexer::base_key_string& obj) const { return hash()(obj); } -}; +namespace std { template <> struct hash { public: - size_t operator()(const reindexer::key_string& obj) const { return hash()(*obj); } + size_t operator()(const reindexer::key_string& obj) const noexcept { return hash()(obj); } }; } // namespace std diff --git a/cpp_src/core/keyvalue/p_string.h b/cpp_src/core/keyvalue/p_string.h index cb99f375f..5fee754bb 100644 --- a/cpp_src/core/keyvalue/p_string.h +++ b/cpp_src/core/keyvalue/p_string.h @@ -39,7 +39,7 @@ struct p_string { constexpr static uint64_t tagVstr = 0x3ULL; // ptr points to slice object constexpr static uint64_t tagSlice = 0x4ULL; - // ptr points to key_string payload atomic_rc_wrapper + // ptr points to key_string payload atomic_rc_wrapper constexpr static uint64_t tagKeyString = 0x5ULL; // ptr points to json_string constexpr static uint64_t tagJsonStr = 0x6ULL; @@ -70,8 +70,9 @@ struct p_string { return std::string_view(str.ptr, str.size); } case tagCxxstr: - case tagKeyString: return std::string_view(*reinterpret_cast(ptr())); + case tagKeyString: + return std::string_view(*reinterpret_cast(ptr())); case tagSlice: return *reinterpret_cast(ptr()); case tagLstr: { @@ -95,8 +96,9 @@ struct p_string { case tagCstr: return reinterpret_cast(ptr()); case tagCxxstr: - case tagKeyString: return (reinterpret_cast(ptr()))->data(); + case tagKeyString: + return (reinterpret_cast(ptr()))->data(); case tagMsgPackStr: return (reinterpret_cast(ptr()))->ptr; case tagSlice: @@ -123,8 +125,9 @@ struct p_string { case tagCstr: return strlen(reinterpret_cast(ptr())); case tagCxxstr: - case tagKeyString: return (reinterpret_cast(ptr()))->length(); + case tagKeyString: + return (reinterpret_cast(ptr()))->size(); case tagSlice: return (reinterpret_cast(ptr()))->size(); case tagLstr: @@ -157,14 +160,21 @@ struct p_string { bool operator>=(p_string other) const noexcept { return compare(other) >= 0; } bool operator<=(p_string other) const noexcept { return compare(other) <= 0; } const std::string* getCxxstr() const noexcept { - assertrx(type() == tagCxxstr || type() == tagKeyString); + assertrx(type() == tagCxxstr); return reinterpret_cast(ptr()); } key_string getKeyString() const noexcept { + if (type() == tagKeyString) { + auto str = reinterpret_cast(const_cast(ptr())); + return key_string(str); + } else { + return make_key_string(data(), size()); + } + } + const key_string_impl* getBaseKeyString() const noexcept { assertrx(type() == tagKeyString); - auto str = reinterpret_cast(const_cast(ptr())); - return key_string(str); + return reinterpret_cast(const_cast(ptr())); } int type() const noexcept { return (v & tagMask) >> tagShift; } diff --git a/cpp_src/core/keyvalue/uuid.cc b/cpp_src/core/keyvalue/uuid.cc index 5a549252b..085620ab7 100644 --- a/cpp_src/core/keyvalue/uuid.cc +++ b/cpp_src/core/keyvalue/uuid.cc @@ -203,6 +203,12 @@ Uuid::operator std::string() const { return res; } +Uuid::operator key_string() const { + char res[kStrFormLen]; + PutToStr({res, kStrFormLen}); + return make_key_string(std::string_view(res, kStrFormLen)); +} + void Uuid::PutToStr(span str) const noexcept { assertrx(str.size() >= kStrFormLen); static constexpr char hexChars[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; diff --git a/cpp_src/core/keyvalue/uuid.h b/cpp_src/core/keyvalue/uuid.h index 731450ad8..11524253b 100644 --- a/cpp_src/core/keyvalue/uuid.h +++ b/cpp_src/core/keyvalue/uuid.h @@ -60,6 +60,7 @@ class Uuid { template explicit Uuid(Ts...) = delete; [[nodiscard]] explicit operator std::string() const; + [[nodiscard]] explicit operator key_string() const; [[nodiscard]] ComparationResult Compare(const Uuid& other) const noexcept { if (data_[0] == other.data_[0]) { return data_[1] == other.data_[1] ? ComparationResult::Eq diff --git a/cpp_src/core/keyvalue/variant.cc b/cpp_src/core/keyvalue/variant.cc index 0ceb031c0..b5c781a29 100644 --- a/cpp_src/core/keyvalue/variant.cc +++ b/cpp_src/core/keyvalue/variant.cc @@ -158,13 +158,7 @@ std::string Variant::As() const { [&](KeyValueType::Bool) { return variant_.value_bool ? "true"s : "false"s; }, [&](KeyValueType::Int64) { return std::to_string(variant_.value_int64); }, [&](KeyValueType::Double) { return double_to_str(variant_.value_double); }, - [&](KeyValueType::String) { - const auto pstr = this->operator p_string(); - if (pstr.type() == p_string::tagCxxstr || pstr.type() == p_string::tagKeyString) { - return *(pstr.getCxxstr()); - } - return pstr.toString(); - }, + [&](KeyValueType::String) { return this->operator p_string().toString(); }, [&](KeyValueType::Null) { return "null"s; }, [this](OneOf) -> std::string { throw Error(errParams, "Can't convert '%s'-value to string", variant_.type.Name()); @@ -179,6 +173,38 @@ std::string Variant::As() const { } } +template <> +key_string Variant::As() const { + using namespace std::string_literals; + if (isUuid()) { + return key_string{Uuid{*this}}; + } else { + return variant_.type.EvaluateOneOf([&](KeyValueType::Int) { return make_key_string(std::to_string(variant_.value_int)); }, + [&](KeyValueType::Bool) { + static const key_string kTrueKeyString = make_key_string("true"); + static const key_string kFalseKeyString = make_key_string("false"); + return variant_.value_bool ? kTrueKeyString : kFalseKeyString; + }, + [&](KeyValueType::Int64) { return make_key_string(std::to_string(variant_.value_int64)); }, + [&](KeyValueType::Double) { return make_key_string(double_to_str(variant_.value_double)); }, + [&](KeyValueType::String) { return this->operator p_string().getKeyString(); }, + [&](KeyValueType::Null) { + static const key_string kNullKeyString = make_key_string("null"); + return kNullKeyString; + }, + [this](OneOf) -> key_string { + throw Error(errParams, "Can't convert '%s'-value to string", variant_.type.Name()); + }, + [&](KeyValueType::Tuple) { + auto va = getCompositeValues(); + WrSerializer wrser; + va.Dump(wrser); + return make_key_string(wrser.Slice()); + }, + [&](KeyValueType::Uuid) { return key_string{Uuid{*this}}; }); + } +} + template <> p_string Variant::As() const { assertrx_throw(!isUuid() && variant_.type.Is()); @@ -992,15 +1018,16 @@ void Variant::convertToComposite(const PayloadType& payloadType, const FieldsSet } // Alloc usual payloadvalue + extra memory for hold string - auto& pv = *new (cast()) PayloadValue(payloadType.TotalSize() + val->size()); + auto strSz = val.size(); + auto& pv = *new (cast()) PayloadValue(payloadType.TotalSize() + strSz); variant_.hold = 1; variant_.type = KeyValueType::Composite{}; // Copy serializer buffer with strings to extra payloadvalue memory char* data = reinterpret_cast(pv.Ptr() + payloadType.TotalSize()); - memcpy(data, val->data(), val->size()); + memcpy(data, val.data(), strSz); - Serializer ser(std::string_view(data, val->size())); + Serializer ser(std::string_view(data, strSz)); size_t count = ser.GetVarUint(); if (count != fields.size()) { @@ -1026,7 +1053,7 @@ VariantArray Variant::getCompositeValues() const { assertrx(variant_.type.Is()); VariantArray res; - Serializer ser(**cast()); + Serializer ser(*cast()); size_t count = ser.GetVarUint(); res.reserve(count); while (count--) { @@ -1040,11 +1067,8 @@ Variant::operator key_string() const { assertKeyType(variant_.type); if (variant_.hold == 1) { return *cast(); - } else if (cast()->type() == p_string::tagKeyString) { - return cast()->getKeyString(); - } else { - return make_key_string(cast()->data(), cast()->size()); } + return cast()->getKeyString(); } Variant::operator p_string() const noexcept { @@ -1056,7 +1080,7 @@ Variant::operator p_string() const noexcept { Variant::operator std::string_view() const noexcept { assertrx(!isUuid()); assertKeyType(variant_.type); - return (variant_.hold == 1) ? std::string_view(**cast()) : *cast(); + return (variant_.hold == 1) ? std::string_view(*cast()) : *cast(); } Variant::operator const PayloadValue&() const noexcept { assertrx(!isUuid()); diff --git a/cpp_src/core/keyvalue/variant.h b/cpp_src/core/keyvalue/variant.h index d7c119943..09d5f0058 100644 --- a/cpp_src/core/keyvalue/variant.h +++ b/cpp_src/core/keyvalue/variant.h @@ -40,9 +40,6 @@ class Variant { Variant(p_string v, hold_t); explicit Variant(p_string v) noexcept : Variant(v, no_hold_t{}) {} explicit Variant(const std::string& v) : variant_{0, 1, KeyValueType::String{}} { new (cast()) key_string(make_key_string(v)); } - explicit Variant(std::string&& v) : variant_{0, 1, KeyValueType::String{}} { - new (cast()) key_string(make_key_string(std::move(v))); - } explicit Variant(std::string_view v) : variant_{0, 1, KeyValueType::String{}} { new (cast()) key_string(make_key_string(v)); } explicit Variant(const key_string& v) noexcept : variant_{0, 1, KeyValueType::String{}} { new (cast()) key_string(v); } explicit Variant(key_string&& v) noexcept : variant_{0, 1, KeyValueType::String{}} { new (cast()) key_string(std::move(v)); } @@ -266,6 +263,8 @@ template <> bool Variant::As() const; template <> std::string Variant::As() const; +template <> +key_string Variant::As() const; class VariantArray : public h_vector { using Base = h_vector; diff --git a/cpp_src/core/lrucache.cc b/cpp_src/core/lrucache.cc index 9170122f7..922f0105c 100644 --- a/cpp_src/core/lrucache.cc +++ b/cpp_src/core/lrucache.cc @@ -9,8 +9,12 @@ namespace reindexer { constexpr uint32_t kMaxHitCountToCache = 1024; -template -typename LRUCache::Iterator LRUCache::Get(const K& key) { +template +LRUCacheImpl::LRUCacheImpl(size_t sizeLimit, uint32_t hitCount) noexcept + : totalCacheSize_(0), cacheSizeLimit_(sizeLimit), hitCountToCache_(hitCount) {} + +template +typename LRUCacheImpl::Iterator LRUCacheImpl::Get(const K& key) { if rx_unlikely (cacheSizeLimit_ == 0) { return Iterator(); } @@ -36,8 +40,8 @@ typename LRUCache::Iterator LRUCache::Get( return Iterator(true, it->second.val); } -template -void LRUCache::Put(const K& key, V&& v) { +template +void LRUCacheImpl::Put(const K& key, V&& v) { if rx_unlikely (cacheSizeLimit_ == 0) { return; } @@ -65,8 +69,8 @@ void LRUCache::Put(const K& key, V&& v) { } } -template -RX_ALWAYS_INLINE bool LRUCache::eraseLRU() { +template +RX_ALWAYS_INLINE bool LRUCacheImpl::eraseLRU() { typename LRUList::iterator it = lru_.begin(); while (totalCacheSize_ > cacheSizeLimit_) { @@ -98,11 +102,11 @@ RX_ALWAYS_INLINE bool LRUCache::eraseLRU() { return !lru_.empty(); } -template -bool LRUCache::clearAll() { +template +bool LRUCacheImpl::clearAll() { const bool res = !items_.empty(); totalCacheSize_ = 0; - std::unordered_map().swap(items_); + std::unordered_map().swap(items_); LRUList().swap(lru_); getCount_ = 0; putCount_ = 0; @@ -110,8 +114,8 @@ bool LRUCache::clearAll() { return res; } -template -LRUCacheMemStat LRUCache::GetMemStat() { +template +LRUCacheMemStat LRUCacheImpl::GetMemStat() const { LRUCacheMemStat ret; std::lock_guard lk(lock_); @@ -125,9 +129,38 @@ LRUCacheMemStat LRUCache::GetMemStat() { return ret; } -template class LRUCache; -template class LRUCache; -template class LRUCache; -template class LRUCache; + +template +void LRUCacheImpl::Clear() { + std::lock_guard lk(lock_); + clearAll(); +} + +template +void LRUCacheImpl::Clear(std::function cond) { + std::lock_guard lock(lock_); + for (auto it = lru_.begin(); it != lru_.end();) { + if (!cond(**it)) { + ++it; + continue; + } + auto mIt = items_.find(**it); + assertrx(mIt != items_.end()); + const size_t oldSize = sizeof(Entry) + kElemSizeOverhead + mIt->first.Size() + mIt->second.val.Size(); + if rx_unlikely (oldSize > totalCacheSize_) { + clearAll(); + return; + } + totalCacheSize_ -= oldSize; + items_.erase(mIt); + it = lru_.erase(it); + ++eraseCount_; + } +} + +template class LRUCacheImpl; +template class LRUCacheImpl; +template class LRUCacheImpl; +template class LRUCacheImpl; } // namespace reindexer diff --git a/cpp_src/core/lrucache.h b/cpp_src/core/lrucache.h index 3acfb7dca..03f946284 100644 --- a/cpp_src/core/lrucache.h +++ b/cpp_src/core/lrucache.h @@ -1,21 +1,22 @@ #pragma once -#include #include #include #include #include "dbconfig.h" +#include "estl/atomic_unique_ptr.h" #include "namespace/namespacestat.h" namespace reindexer { constexpr size_t kElemSizeOverhead = 256; -template -class LRUCache { +template +class LRUCacheImpl { public: using Key = K; - LRUCache(size_t sizeLimit, uint32_t hitCount) noexcept : totalCacheSize_(0), cacheSizeLimit_(sizeLimit), hitCountToCache_(hitCount) {} + using Value = V; + LRUCacheImpl(size_t sizeLimit, uint32_t hitCount) noexcept; struct Iterator { Iterator(bool k = false, const V& v = V()) : valid(k), val(v) {} Iterator(const Iterator& other) = delete; @@ -32,17 +33,13 @@ class LRUCache { bool valid; V val; }; - // Get cached val. Create new entry in cache if unexists + // Get cached val. Create new entry in cache if does not exist Iterator Get(const K& k); // Put cached val void Put(const K& k, V&& v); - - LRUCacheMemStat GetMemStat(); - - bool Clear() { - std::lock_guard lk(lock_); - return clearAll(); - } + LRUCacheMemStat GetMemStat() const; + void Clear(); + void Clear(std::function cond); template void Dump(T& os, std::string_view step, std::string_view offset) const { @@ -78,29 +75,7 @@ class LRUCache { os << "]\n" << offset << '}'; } - template - void Clear(const F& cond) { - std::lock_guard lock(lock_); - for (auto it = lru_.begin(); it != lru_.end();) { - if (!cond(**it)) { - ++it; - continue; - } - auto mIt = items_.find(**it); - assertrx(mIt != items_.end()); - const size_t oldSize = sizeof(Entry) + kElemSizeOverhead + mIt->first.Size() + mIt->second.val.Size(); - if rx_unlikely (oldSize > totalCacheSize_) { - clearAll(); - return; - } - totalCacheSize_ -= oldSize; - items_.erase(mIt); - it = lru_.erase(it); - ++eraseCount_; - } - } - -protected: +private: typedef std::list LRUList; struct Entry { V val; @@ -115,7 +90,7 @@ class LRUCache { bool eraseLRU(); bool clearAll(); - std::unordered_map items_; + std::unordered_map items_; LRUList lru_; mutable std::mutex lock_; size_t totalCacheSize_; @@ -125,4 +100,113 @@ class LRUCache { uint64_t getCount_ = 0, putCount_ = 0, eraseCount_ = 0; }; +enum class LRUWithAtomicPtr : bool { Yes, No }; + +template +class LRUCache { + using CachePtrT = std::conditional_t, std::unique_ptr>; + +public: + using Iterator = typename CacheT::Iterator; + + LRUCache() = default; + template + LRUCache(Args&&... args) noexcept : ptr_(makePtr(std::forward(args)...)) { + (void)alignment1_; + (void)alignment2_; +#if defined(__x86_64__) + static_assert(sizeof(LRUCache) == 128, "Unexpected size. Check alignment"); +#endif // defined(__x86_64__) + } + virtual ~LRUCache() = default; + + typename CacheT::Iterator Get(const typename CacheT::Key& k) const { + typename CacheT::Iterator it; + if (ptr_) { + it = ptr_->Get(k); + if (it.valid && it.val.IsInitialized()) { + stats_.hits.fetch_add(1, std::memory_order_relaxed); + } else { + stats_.misses.fetch_add(1, std::memory_order_relaxed); + } + } + return it; + } + void Put(const typename CacheT::Key& k, typename CacheT::Value&& v) const { + if (ptr_) { + ptr_->Put(k, std::move(v)); + } + } + LRUCacheMemStat GetMemStat() const { return ptr_ ? ptr_->GetMemStat() : LRUCacheMemStat(); } + LRUCachePerfStat GetPerfStat() const noexcept { + auto stats = stats_.GetPerfStat(); + stats.state = ptr_ ? LRUCachePerfStat::State::Active : LRUCachePerfStat::State::Inactive; + return stats; + } + void ResetPerfStat() noexcept { stats_.Reset(); } + void Clear() { + if (ptr_) { + ptr_->Clear(); + } + } + template + void Clear(const F& cond) { + if (ptr_) { + ptr_->Clear(cond); + } + } + + template + void Dump(T& os, std::string_view step, std::string_view offset) const { + if (ptr_) { + ptr_->Dump(os, step, offset); + } else { + os << ""; + } + } + void ResetImpl() noexcept { ptr_.reset(); } + template + void Reinitialize(Args&&... args) { + ptr_ = makePtr(std::forward(args)...); + } + bool IsActive() const noexcept { return ptr_.get(); } + void CopyInternalPerfStatsFrom(const LRUCache& o) noexcept { stats_ = o.stats_; } + +private: + template + CachePtrT makePtr(Args&&... args) { + return CachePtrT(new CacheT(std::forward(args)...)); + } + + class Stats { + public: + Stats(uint64_t _hits = 0, uint64_t _misses = 0) noexcept : hits{_hits}, misses{_misses} {} + Stats(const Stats& o) : hits(o.hits.load(std::memory_order_relaxed)), misses(o.misses.load(std::memory_order_relaxed)) {} + LRUCachePerfStat GetPerfStat() const noexcept { + return LRUCachePerfStat{.hits = hits.load(std::memory_order_relaxed), .misses = misses.load(std::memory_order_relaxed)}; + } + void Reset() noexcept { + hits.store(0, std::memory_order_relaxed); + misses.store(0, std::memory_order_relaxed); + } + Stats& operator=(const Stats& o) { + if (&o != this) { + hits.store(o.hits.load()); + misses.store(o.misses.load()); + } + return *this; + } + + std::atomic_uint64_t hits; + std::atomic_uint64_t misses; + }; + + // Cache line alignment to avoid contention betwee atomic cache ptr and cache stats (alignas would be better, but it does not work + // properly with tcmalloc on CentOS7) + uint8_t alignment1_[48]; + CachePtrT ptr_; + uint8_t alignment2_[48]; + mutable Stats stats_; +}; + } // namespace reindexer diff --git a/cpp_src/core/namespace/namespace.cc b/cpp_src/core/namespace/namespace.cc index d219ba229..e58b6ca78 100644 --- a/cpp_src/core/namespace/namespace.cc +++ b/cpp_src/core/namespace/namespace.cc @@ -127,6 +127,10 @@ void Namespace::ApplySnapshotChunk(const SnapshotChunk& ch, bool isInitialLeader SnapshotTxHandler handler(*this); handler.ApplyChunk(ch, isInitialLeaderSync, ctx); } + + if (ch.IsLastChunk() && (ch.IsShallow() || !ch.IsWAL())) { + ns_->RebuildFreeItemsStorage(ctx); + } } bool Namespace::needNamespaceCopy(const NamespaceImpl::Ptr& ns, const LocalTransaction& tx) const noexcept { diff --git a/cpp_src/core/namespace/namespaceimpl.cc b/cpp_src/core/namespace/namespaceimpl.cc index 073f7a563..5d4c796b1 100644 --- a/cpp_src/core/namespace/namespaceimpl.cc +++ b/cpp_src/core/namespace/namespaceimpl.cc @@ -8,6 +8,7 @@ #include "core/cjson/uuid_recoders.h" #include "core/formatters/lsn_fmt.h" #include "core/index/index.h" +#include "core/index/indexfastupdate.h" #include "core/index/ttlindex.h" #include "core/itemimpl.h" #include "core/itemmodifier.h" @@ -80,9 +81,8 @@ NamespaceImpl::NamespaceImpl(const NamespaceImpl& src, AsyncStorage::FullLockT& schema_(src.schema_), enablePerfCounters_{src.enablePerfCounters_.load()}, config_{src.config_}, - queryCountCache_{ - std::make_unique(config_.cacheConfig.queryCountCacheSize, config_.cacheConfig.queryCountHitsToCache)}, - joinCache_{std::make_unique(config_.cacheConfig.joinCacheSize, config_.cacheConfig.joinHitsToCache)}, + queryCountCache_{config_.cacheConfig.queryCountCacheSize, config_.cacheConfig.queryCountHitsToCache}, + joinCache_{config_.cacheConfig.joinCacheSize, config_.cacheConfig.joinHitsToCache}, wal_{src.wal_, storage_}, repl_{src.repl_}, storageOpts_{src.storageOpts_}, @@ -102,6 +102,8 @@ NamespaceImpl::NamespaceImpl(const NamespaceImpl& src, AsyncStorage::FullLockT& for (auto& idxIt : src.indexes_) { indexes_.push_back(idxIt->Clone()); } + queryCountCache_.CopyInternalPerfStatsFrom(src.queryCountCache_); + joinCache_.CopyInternalPerfStatsFrom(src.joinCache_); markUpdated(IndexOptimization::Full); logPrintf(LogInfo, "Namespace::CopyContentsFrom (%s).Workers: %d, timeout: %d, tm: { state_token: 0x%08X, version: %d }", name_, @@ -123,9 +125,8 @@ NamespaceImpl::NamespaceImpl(const std::string& name, std::optional sta tagsMatcher_(payloadType_, stateToken.has_value() ? stateToken.value() : tools::RandomGenerator::gets32()), locker_(syncer, *this), enablePerfCounters_{false}, - queryCountCache_{ - std::make_unique(config_.cacheConfig.queryCountCacheSize, config_.cacheConfig.queryCountHitsToCache)}, - joinCache_{std::make_unique(config_.cacheConfig.joinCacheSize, config_.cacheConfig.joinHitsToCache)}, + queryCountCache_{config_.cacheConfig.queryCountCacheSize, config_.cacheConfig.queryCountHitsToCache}, + joinCache_{config_.cacheConfig.joinCacheSize, config_.cacheConfig.joinHitsToCache}, wal_{getWalSize(config_)}, lastSelectTime_{0}, cancelCommitCnt_{0}, @@ -285,13 +286,12 @@ void NamespaceImpl::OnConfigUpdated(DBConfigProvider& configProvider, const RdxC config_.cacheConfig.ftIdxCacheSize / 1024, config_.cacheConfig.ftIdxHitsToCache); } if (needReconfigureJoinCache) { - joinCache_ = std::make_unique(config_.cacheConfig.joinCacheSize, config_.cacheConfig.joinHitsToCache); + joinCache_.Reinitialize(config_.cacheConfig.joinCacheSize, config_.cacheConfig.joinHitsToCache); logPrintf(LogTrace, "[%s] Join cache has been reconfigured: { max_size %lu KB; hits: %u }", name_, config_.cacheConfig.joinCacheSize / 1024, config_.cacheConfig.joinHitsToCache); } if (needReconfigureQueryCountCache) { - queryCountCache_ = - std::make_unique(config_.cacheConfig.queryCountCacheSize, config_.cacheConfig.queryCountHitsToCache); + queryCountCache_.Reinitialize(config_.cacheConfig.queryCountCacheSize, config_.cacheConfig.queryCountHitsToCache); logPrintf(LogTrace, "[%s] Queries count cache has been reconfigured: { max_size %lu KB; hits: %u }", name_, config_.cacheConfig.queryCountCacheSize / 1024, config_.cacheConfig.queryCountHitsToCache); } @@ -728,8 +728,8 @@ void NamespaceImpl::dumpIndex(std::ostream& os, std::string_view index) const { } void NamespaceImpl::clearNamespaceCaches() { - queryCountCache_->Clear(); - joinCache_->Clear(); + queryCountCache_.Clear(); + joinCache_.Clear(); } void NamespaceImpl::dropIndex(const IndexDef& index, bool disableTmVersionInc) { @@ -909,9 +909,12 @@ void NamespaceImpl::verifyUpdateIndex(const IndexDef& indexDef) const { throw Error(errConflict, "Cannot add PK index '%s.%s'. Already exists another PK index - '%s'", name_, indexDef.name_, indexes_[currentPKIt->second]->Name()); } - if (indexDef.opts_.IsArray() != oldIndex->Opts().IsArray()) { - throw Error(errParams, "Cannot update index '%s' in namespace '%s'. Can't convert array index to not array and vice versa", - indexDef.name_, name_); + if (indexDef.opts_.IsArray() != oldIndex->Opts().IsArray() && !items_.empty()) { + // Array may be converted to scalar and scalar to array only if there are no items in namespace + throw Error( + errParams, + "Cannot update index '%s' in namespace '%s'. Can't convert array index to not array and vice versa in non-empty namespace", + indexDef.name_, name_); } if (indexDef.opts_.IsPK() && indexDef.opts_.IsArray()) { throw Error(errParams, "Cannot update index '%s' in namespace '%s'. PK field can't be array", indexDef.name_, name_); @@ -950,7 +953,7 @@ void NamespaceImpl::verifyUpdateIndex(const IndexDef& indexDef) const { FieldsSet changedFields{idxNameIt->second}; PayloadType newPlType = payloadType_; newPlType.Drop(indexDef.name_); - newPlType.Add(PayloadFieldType(newIndex->KeyType(), indexDef.name_, indexDef.jsonPaths_, indexDef.opts_.IsArray())); + newPlType.Add(PayloadFieldType(*newIndex, indexDef)); verifyConvertTypes(oldIndex->KeyType(), newIndex->KeyType(), newPlType, changedFields); } } @@ -1147,7 +1150,7 @@ void NamespaceImpl::addIndex(const IndexDef& indexDef, bool disableTmVersionInc, } else { PayloadType oldPlType = payloadType_; auto newIndex = Index::New(indexDef, PayloadType(), FieldsSet(), config_.cacheConfig); - payloadType_.Add(PayloadFieldType{newIndex->KeyType(), indexName, jsonPaths, newIndex->Opts().IsArray()}); + payloadType_.Add(PayloadFieldType(*newIndex, indexDef)); rollbacker.SetOldPayloadType(std::move(oldPlType)); tagsMatcher_.UpdatePayloadType(payloadType_, disableTmVersionInc ? NeedChangeTmVersion::No : NeedChangeTmVersion::Increment); rollbacker.NeedResetPayloadTypeInTagsMatcher(disableTmVersionInc); @@ -1192,7 +1195,7 @@ bool NamespaceImpl::updateIndex(const IndexDef& indexDef, bool disableTmVersionI if (indexDef.IsEqual(foundIndex, IndexComparison::SkipConfig)) { // Index has not been changed - if (!indexDef.IsEqual(foundIndex, IndexComparison::WithConfig)) { + if (!indexDef.IsEqual(foundIndex, IndexComparison::Full)) { // Only index config changed // Just call SetOpts auto idxPtr = indexes_[getIndexByName(indexName)].get(); @@ -1205,8 +1208,10 @@ bool NamespaceImpl::updateIndex(const IndexDef& indexDef, bool disableTmVersionI } verifyUpdateIndex(indexDef); - dropIndex(indexDef, disableTmVersionInc); - addIndex(indexDef, disableTmVersionInc); + if (!IndexFastUpdate::Try(*this, foundIndex, indexDef)) { + dropIndex(indexDef, disableTmVersionInc); + addIndex(indexDef, disableTmVersionInc); + } return true; } @@ -1305,7 +1310,7 @@ bool NamespaceImpl::checkIfSameIndexExists(const IndexDef& indexDef, bool* requi } oldIndexDef.expireAfter_ = indexDef.expireAfter_; } - if (indexDef.IsEqual(oldIndexDef, IndexComparison::SkipConfig)) { + if (indexDef.IsEqual(oldIndexDef, IndexComparison::BasicCompatibilityOnly)) { return true; } throw Error(errConflict, "Index '%s.%s' already exists with different settings", name_, indexDef.name_); @@ -1913,7 +1918,7 @@ void NamespaceImpl::doModifyItem(Item& item, ItemModifyMode mode, UpdatesContain } for (int field = 1, regularIndexes = indexes_.firstCompositePos(); field < regularIndexes; ++field) { - Index& index = *indexes_[field]; + const Index& index = *indexes_[field]; if (index.Opts().GetCollateMode() == CollateUTF8 && index.KeyType().Is()) { if (index.Opts().IsSparse()) { assertrx(index.Fields().getTagsPathsLength() > 0); @@ -1931,7 +1936,7 @@ void NamespaceImpl::doModifyItem(Item& item, ItemModifyMode mode, UpdatesContain if (suggestedId >= 0 && exists && suggestedId != realItem.first) { throw Error(errParams, "Suggested ID doesn't correspond to real ID: %d vs %d", suggestedId, realItem.first); } - const IdType id = exists ? realItem.first : createItem(newPl.RealSize(), suggestedId); + const IdType id = exists ? realItem.first : createItem(newPl.RealSize(), suggestedId, ctx); replicateTmUpdateIfRequired(pendedRepl, oldTmV, ctx); lsn_t lsn; @@ -2495,8 +2500,8 @@ NamespaceMemStat NamespaceImpl::GetMemStat(const RdxContext& ctx) { NamespaceMemStat ret; auto rlck = rLock(ctx); ret.name = name_; - ret.joinCache = joinCache_->GetMemStat(); - ret.queryCache = queryCountCache_->GetMemStat(); + ret.joinCache = joinCache_.GetMemStat(); + ret.queryCache = queryCountCache_.GetMemStat(); ret.itemsCount = itemsCount(); *(static_cast(&ret.replication)) = getReplState(); @@ -2567,6 +2572,9 @@ NamespacePerfStat NamespaceImpl::GetPerfStat(const RdxContext& ctx) { ret.name = name_; ret.selects = selectPerfCounter_.Get(); ret.updates = updatePerfCounter_.Get(); + ret.joinCache = joinCache_.GetPerfStat(); + ret.queryCountCache = queryCountCache_.GetPerfStat(); + ret.indexes.reserve(indexes_.size() - 1); for (unsigned i = 1; i < indexes_.size(); i++) { ret.indexes.emplace_back(indexes_[i]->GetIndexPerfStat()); } @@ -2580,6 +2588,8 @@ void NamespaceImpl::ResetPerfStat(const RdxContext& ctx) { for (auto& i : indexes_) { i->ResetIndexPerfStat(); } + queryCountCache_.ResetPerfStat(); + joinCache_.ResetPerfStat(); } Error NamespaceImpl::loadLatestSysRecord(std::string_view baseSysTag, uint64_t& version, std::string& content) { @@ -2664,6 +2674,7 @@ bool NamespaceImpl::loadIndexesFromStorage() { throw status; } std::string_view schemaStr = schema_->GetJSON(); + // NOLINTNEXTLINE(bugprone-suspicious-stringview-data-usage) schemaStr = std::string_view(schemaStr.data(), std::min(schemaStr.size(), kMaxSchemaCharsToPrint)); logPrintf(LogInfo, "Loaded schema(version: %lld) of the namespace '%s'. First %d symbols of the schema are: '%s'", sysRecordsVersions_.schemaVersion ? sysRecordsVersions_.schemaVersion - 1 : 0, name_, schemaStr.size(), schemaStr); @@ -2958,6 +2969,21 @@ void NamespaceImpl::SetTagsMatcher(TagsMatcher&& tm, const RdxContext& ctx) { replicate(std::move(pendedRepl), std::move(wlck), true, nullptr, ctx); } +void NamespaceImpl::RebuildFreeItemsStorage(const RdxContext& ctx) { + std::vector newFree; + auto wlck = simpleWLock(ctx); + + if (!getReplState().temporary) { + throw Error(errLogic, "Unexpected manual free items rebuild on non-temporary namespace"); + } + for (IdType i = 0, sz = IdType(items_.size()); i < sz; ++i) { + if (items_[i].IsFree()) { + newFree.emplace_back(i); + } + } + free_ = std::move(newFree); +} + void NamespaceImpl::LoadFromStorage(unsigned threadsCount, const RdxContext& ctx) { auto wlck = simpleWLock(ctx); FlagGuardT nsLoadingGuard(nsIsLoading_); @@ -3047,6 +3073,7 @@ void NamespaceImpl::removeExpiredStrings(RdxActivityContext* ctx) { void NamespaceImpl::setSchema(std::string_view schema, UpdatesContainer& pendedRepl, const NsContext& ctx) { using namespace std::string_view_literals; + // NOLINTNEXTLINE (bugprone-suspicious-stringview-data-usage) std::string_view schemaPrint(schema.data(), std::min(schema.size(), kMaxSchemaCharsToPrint)); std::string_view source = "user"sv; if (ctx.inSnapshot) { @@ -3225,7 +3252,7 @@ std::vector NamespaceImpl::EnumMeta(const RdxContext& ctx) { std::vector NamespaceImpl::enumMeta() const { std::vector keys(meta_.size()); - transform(meta_.begin(), meta_.end(), keys.begin(), [](auto pair) { return pair.first; }); + transform(meta_.begin(), meta_.end(), keys.begin(), [](const auto& pair) { return pair.first; }); return keys; } @@ -3284,7 +3311,7 @@ void NamespaceImpl::updateSortedIdxCount() { scheduleIndexOptimization(IndexOptimization::Full); } -IdType NamespaceImpl::createItem(size_t realSize, IdType suggestedId) { +IdType NamespaceImpl::createItem(size_t realSize, IdType suggestedId, const NsContext& ctx) { IdType id = 0; if (suggestedId < 0) { if (!free_.empty()) { @@ -3301,26 +3328,16 @@ IdType NamespaceImpl::createItem(size_t realSize, IdType suggestedId) { items_.emplace_back(PayloadValue(realSize)); } } else { + if (!ctx.IsForceSyncItem()) { + throw Error(errParams, "Suggested ID should only be used during force-sync replication: %d", suggestedId); + } id = suggestedId; - auto found = std::find(free_.begin(), free_.end(), id); - if (found == free_.end()) { - if (items_.size() > size_t(id)) { - if (!items_[size_t(id)].IsFree()) { - throw Error(errParams, "Suggested ID %d is not empty", id); - } - } else { - auto sz = items_.size(); - items_.resize(size_t(id) + 1); - free_.reserve(free_.size() + items_.size() - sz); - while (sz < items_.size() - 1) { - free_.push_back(sz++); - } - items_[id] = PayloadValue(realSize); + if (size_t(id) < items_.size()) { + if (!items_[size_t(id)].IsFree()) { + throw Error(errParams, "Suggested ID %d is not empty", id); } } else { - free_.erase(found); - assertrx(id < IdType(items_.size())); - assertrx(items_[id].IsFree()); + items_.resize(size_t(id) + 1); items_[id] = PayloadValue(realSize); } } @@ -3366,8 +3383,8 @@ int64_t NamespaceImpl::GetSerial(const std::string& field, UpdatesContainer& rep } void NamespaceImpl::FillResult(LocalQueryResults& result, const IdSet& ids) const { - for (auto& id : ids) { - result.Add({id, items_[id], 0, 0}); + for (auto id : ids) { + result.AddItemRef(id, items_[id], 0, 0); } } @@ -3388,11 +3405,11 @@ void NamespaceImpl::getFromJoinCache(const Query& q, JoinCacheRes& out) const { } void NamespaceImpl::getFromJoinCacheImpl(JoinCacheRes& ctx) const { - auto it = joinCache_->Get(ctx.key); + auto it = joinCache_.Get(ctx.key); ctx.needPut = false; ctx.haveData = false; if (it.valid) { - if (!it.val.inited) { + if (!it.val.IsInitialized()) { ctx.needPut = true; } else { ctx.haveData = true; @@ -3413,11 +3430,11 @@ void NamespaceImpl::putToJoinCache(JoinCacheRes& res, JoinPreResult::CPtr preRes res.needPut = false; joinCacheVal.inited = true; joinCacheVal.preResult = std::move(preResult); - joinCache_->Put(res.key, std::move(joinCacheVal)); + joinCache_.Put(res.key, std::move(joinCacheVal)); } void NamespaceImpl::putToJoinCache(JoinCacheRes& res, JoinCacheVal&& val) const { val.inited = true; - joinCache_->Put(res.key, std::move(val)); + joinCache_.Put(res.key, std::move(val)); } const FieldsSet& NamespaceImpl::pkFields() { diff --git a/cpp_src/core/namespace/namespaceimpl.h b/cpp_src/core/namespace/namespaceimpl.h index 83eb97b2c..7678a6000 100644 --- a/cpp_src/core/namespace/namespaceimpl.h +++ b/cpp_src/core/namespace/namespaceimpl.h @@ -3,7 +3,6 @@ #include #include #include -#include #include #include "asyncstorage.h" #include "cluster/idatareplicator.h" @@ -414,6 +413,7 @@ class NamespaceImpl final : public intrusive_atomic_rc_base { // NOLINT(*perfor storage_.Flush(flushOpts); return storage_.GetStatusCached().err; } + void RebuildFreeItemsStorage(const RdxContext& ctx); private: struct SysRecordsVersions { @@ -429,6 +429,8 @@ class NamespaceImpl final : public intrusive_atomic_rc_base { // NOLINT(*perfor lsn_t lsn; }; + friend struct IndexFastUpdate; + Error rebuildIndexesTagsPaths(const TagsMatcher& newTm); ReplicationState getReplState() const; std::string sysRecordName(std::string_view sysTag, uint64_t version); @@ -579,7 +581,7 @@ class NamespaceImpl final : public intrusive_atomic_rc_base { // NOLINT(*perfor NamespaceImpl(const NamespaceImpl& src, AsyncStorage::FullLockT& storageLock); bool isSystem() const noexcept { return isSystemNamespaceNameFast(name_); } - IdType createItem(size_t realSize, IdType suggestedId); + IdType createItem(size_t realSize, IdType suggestedId, const NsContext& ctx); void processWalRecord(WALRecord&& wrec, const NsContext& ctx, lsn_t itemLsn = lsn_t(), Item* item = nullptr); void replicateAsync(updates::UpdateRecord&& rec, const RdxContext& ctx); @@ -633,8 +635,8 @@ class NamespaceImpl final : public intrusive_atomic_rc_base { // NOLINT(*perfor std::atomic_bool enablePerfCounters_{false}; NamespaceConfigData config_; - std::unique_ptr queryCountCache_; - std::unique_ptr joinCache_; + QueryCountCache queryCountCache_; + JoinCache joinCache_; // Replication variables WALTracker wal_; ReplicationState repl_; diff --git a/cpp_src/core/namespace/namespacestat.cc b/cpp_src/core/namespace/namespacestat.cc index 6f1581508..3d5a037f9 100644 --- a/cpp_src/core/namespace/namespacestat.cc +++ b/cpp_src/core/namespace/namespacestat.cc @@ -1,5 +1,4 @@ #include "namespacestat.h" - #include "core/cjson/jsonbuilder.h" #include "gason/gason.h" @@ -23,7 +22,6 @@ void NamespaceMemStat::GetJSON(WrSerializer& ser) { builder.Put("storage_enabled", storageEnabled); builder.Put("storage_path", storagePath); - builder.Put("storage_loaded", storageLoaded); builder.Put("optimization_completed", optimizationCompleted); builder.Object("total") @@ -73,8 +71,8 @@ void IndexMemStat::GetJSON(JsonBuilder& builder) { if (trackedUpdatesSize) { builder.Put("tracked_updates_size", trackedUpdatesSize); } - if (trackedUpdatesOveflow) { - builder.Put("tracked_updates_overflow", trackedUpdatesOveflow); + if (trackedUpdatesOverflow) { + builder.Put("tracked_updates_overflow", trackedUpdatesOverflow); } if (dataSize) { builder.Put("data_size", dataSize); @@ -131,6 +129,14 @@ void NamespacePerfStat::GetJSON(WrSerializer& ser) { auto obj = builder.Object("transactions"); transactions.GetJSON(obj); } + if (queryCountCache.state != LRUCachePerfStat::State::DoesNotExist) { + auto obj = builder.Object("query_count_cache"); + queryCountCache.GetJSON(obj); + } + if (joinCache.state != LRUCachePerfStat::State::DoesNotExist) { + auto obj = builder.Object("join_cache"); + joinCache.GetJSON(obj); + } auto arr = builder.Array("indexes"); @@ -150,6 +156,10 @@ void IndexPerfStat::GetJSON(JsonBuilder& builder) { auto obj = builder.Object("commits"); commits.GetJSON(obj); } + if (cache.state != LRUCachePerfStat::State::DoesNotExist) { + auto obj = builder.Object("cache"); + cache.GetJSON(obj); + } } static bool LoadLsn(lsn_t& to, const gason::JsonNode& node) { @@ -241,6 +251,29 @@ void TxPerfStat::GetJSON(JsonBuilder& builder) { builder.Put("max_copy_time_us", maxCopyTimeUs); } +void LRUCachePerfStat::GetJSON(JsonBuilder& builder) { + switch (state) { + case State::DoesNotExist: + return; + case State::Inactive: + builder.Put("is_active", false); + break; + case State::Active: + builder.Put("is_active", true); + break; + } + + builder.Put("total_queries", TotalQueries()); + builder.Put("cache_hit_rate", HitRate()); +} + +uint64_t LRUCachePerfStat::TotalQueries() const noexcept { return hits + misses; } + +double LRUCachePerfStat::HitRate() const noexcept { + const auto tq = TotalQueries(); + return tq ? (double(hits) / double(tq)) : 0.0; +} + static constexpr std::string_view nsClusterizationRoleToStr(ClusterizationStatus::Role role) noexcept { switch (role) { case ClusterizationStatus::Role::ClusterReplica: diff --git a/cpp_src/core/namespace/namespacestat.h b/cpp_src/core/namespace/namespacestat.h index c87020bbf..ca6d50465 100644 --- a/cpp_src/core/namespace/namespacestat.h +++ b/cpp_src/core/namespace/namespacestat.h @@ -36,7 +36,7 @@ struct IndexMemStat { size_t trackedUpdatesCount = 0; size_t trackedUpdatesBuckets = 0; size_t trackedUpdatesSize = 0; - size_t trackedUpdatesOveflow = 0; + size_t trackedUpdatesOverflow = 0; LRUCacheMemStat idsetCache; size_t GetIndexStructSize() const noexcept { return idsetPlainSize + idsetBTreeSize + sortOrdersSize + fulltextSize + columnSize + trackedUpdatesSize; @@ -71,7 +71,9 @@ struct ClusterizationStatus { struct ReplicationState { enum class Status { None, Idle, Error, Fatal, Syncing }; - void GetJSON(JsonBuilder& builder); + virtual ~ReplicationState() = default; + + virtual void GetJSON(JsonBuilder& builder); void FromJSON(span); // LSN of last change @@ -115,8 +117,9 @@ struct ReplicationStateV2 { ClusterizationStatus clusterStatus; }; -struct ReplicationStat : public ReplicationState { - void GetJSON(JsonBuilder& builder); +struct ReplicationStat final : public ReplicationState { + void GetJSON(JsonBuilder& builder) override; + size_t walCount = 0; size_t walSize = 0; int16_t serverId = 0; @@ -130,7 +133,6 @@ struct NamespaceMemStat { bool storageOK = false; bool storageEnabled = false; std::string storageStatus; - bool storageLoaded = true; bool optimizationCompleted = false; size_t itemsCount = 0; size_t emptyItemsCount = 0; @@ -147,6 +149,18 @@ struct NamespaceMemStat { std::vector indexes; }; +struct LRUCachePerfStat { + enum class State { DoesNotExist, Active, Inactive }; + + void GetJSON(JsonBuilder& builder); + uint64_t TotalQueries() const noexcept; + double HitRate() const noexcept; + + State state = State::DoesNotExist; + uint64_t hits = 0; + uint64_t misses = 0; +}; + struct PerfStat { void GetJSON(JsonBuilder& builder); @@ -182,13 +196,14 @@ struct TxPerfStat { struct IndexPerfStat { IndexPerfStat() = default; - IndexPerfStat(const std::string& n, const PerfStat& s, const PerfStat& c) : name(n), selects(s), commits(c) {} + IndexPerfStat(const std::string& n, PerfStat&& s, PerfStat&& c) : name(n), selects(std::move(s)), commits(std::move(c)) {} void GetJSON(JsonBuilder& builder); std::string name; PerfStat selects; PerfStat commits; + LRUCachePerfStat cache; }; struct NamespacePerfStat { @@ -199,6 +214,8 @@ struct NamespacePerfStat { PerfStat selects; TxPerfStat transactions; std::vector indexes; + LRUCachePerfStat joinCache; + LRUCachePerfStat queryCountCache; }; } // namespace reindexer diff --git a/cpp_src/core/namespace/snapshot/snapshot.cc b/cpp_src/core/namespace/snapshot/snapshot.cc index e36601e78..85123cbfa 100644 --- a/cpp_src/core/namespace/snapshot/snapshot.cc +++ b/cpp_src/core/namespace/snapshot/snapshot.cc @@ -212,7 +212,7 @@ SnapshotChunk Snapshot::Iterator::Chunk() const { chunk.MarkShallow(shallow); chunk.MarkWAL(wal); chunk.MarkTx(chunks[idx].txChunk); - chunk.MarkLast(idx == sn_->Size() - 1); + chunk.MarkLast(idx_ == sn_->Size() - 1); return chunk; } diff --git a/cpp_src/core/namespace/stringsholder.h b/cpp_src/core/namespace/stringsholder.h index a0fa9cd68..511cfa9ef 100644 --- a/cpp_src/core/namespace/stringsholder.h +++ b/cpp_src/core/namespace/stringsholder.h @@ -3,6 +3,7 @@ #include #include #include "core/keyvalue/key_string.h" +#include "estl/intrusive_ptr.h" namespace reindexer { @@ -14,22 +15,22 @@ class StringsHolder : private std::vector { public: ~StringsHolder(); void Add(key_string&& str, size_t strSize) { - memStat_ += sizeof(Base::value_type) + strSize; Base::emplace_back(std::move(str)); + memStat_ += sizeof(Base::value_type) + strSize; } void Add(key_string&& str) { - memStat_ += sizeof(Base::value_type) + sizeof(*str.get()) + str->heap_size(); - Base::emplace_back(std::move(str)); + auto& s = Base::emplace_back(std::move(str)); + memStat_ += sizeof(Base::value_type) + s.heap_size(); } void Add(const key_string& str) { - memStat_ += sizeof(Base::value_type) + sizeof(*str.get()) + str->heap_size(); - Base::push_back(str); + auto& s = Base::emplace_back(str); + memStat_ += sizeof(Base::value_type) + s.heap_size(); } void Add(std::unique_ptr&&); template void emplace_back(T&&... v) { - Base::emplace_back(std::forward(v)...); - memStat_ += sizeof(Base::value_type) + sizeof(*back().get()) + back()->heap_size(); + auto& str = Base::emplace_back(std::forward(v)...); + memStat_ += sizeof(key_string) + str.heap_size(); } void Clear() noexcept; size_t MemStat() const noexcept { return memStat_; } diff --git a/cpp_src/core/namespacedef.h b/cpp_src/core/namespacedef.h index 04ca99853..6599125e8 100644 --- a/cpp_src/core/namespacedef.h +++ b/cpp_src/core/namespacedef.h @@ -18,7 +18,7 @@ class Namespace; struct NamespaceDef { NamespaceDef() = default; - NamespaceDef(std::string iname, StorageOpts istorage = StorageOpts().Enabled().CreateIfMissing()) + explicit NamespaceDef(std::string iname, StorageOpts istorage = StorageOpts().Enabled().CreateIfMissing()) : name(std::move(iname)), storage(istorage) {} NamespaceDef& AddIndex(const std::string& iname, const std::string& indexType, const std::string& fieldType, diff --git a/cpp_src/core/nsselecter/btreeindexiterator.h b/cpp_src/core/nsselecter/btreeindexiterator.h index 6365f922a..c87f62d15 100644 --- a/cpp_src/core/nsselecter/btreeindexiterator.h +++ b/cpp_src/core/nsselecter/btreeindexiterator.h @@ -53,19 +53,37 @@ class BtreeIndexIterator final : public IndexIterator { return impl_->getValue(); } size_t GetMaxIterations(size_t limitIters) noexcept final { - if (cachedIters_ != std::numeric_limits::max()) { - return cachedIters_; + auto limit = std::min(kMaxBTreeIterations, limitIters); + if (!cachedIters_.Valid(limit)) { + auto [iters, fullyScanned] = BtreeIndexForwardIteratorImpl(idxMap_, first_, last_).getMaxIterations(limit); + + if (iters >= kMaxBTreeIterations && !fullyScanned) { + cachedIters_ = CachedIters{std::numeric_limits::max(), true}; + } else if (fullyScanned || iters > cachedIters_.value || cachedIters_.value == std::numeric_limits::max()) { + cachedIters_ = CachedIters{iters, fullyScanned}; + } } - return BtreeIndexForwardIteratorImpl(idxMap_, first_, last_).getMaxIterations(limitIters); + + return std::min(cachedIters_.value, limitIters); } - void SetMaxIterations(size_t iters) noexcept final { cachedIters_ = iters; } + void SetMaxIterations(size_t iters) noexcept final { cachedIters_ = CachedIters{iters, true}; } private: + static constexpr size_t kMaxBTreeIterations = 200'000; + std::shared_ptr> impl_; const T& idxMap_; const typename T::const_iterator first_; const typename T::const_iterator last_; - size_t cachedIters_ = std::numeric_limits::max(); + + struct CachedIters { + bool Valid(size_t limitIters) const noexcept { + return fullyScanned || (limitIters <= value && value != std::numeric_limits::max()); + } + + size_t value = std::numeric_limits::max(); + bool fullyScanned = false; + } cachedIters_; }; } // namespace reindexer diff --git a/cpp_src/core/nsselecter/btreeindexiteratorimpl.h b/cpp_src/core/nsselecter/btreeindexiteratorimpl.h index c1e340911..828b7312f 100644 --- a/cpp_src/core/nsselecter/btreeindexiteratorimpl.h +++ b/cpp_src/core/nsselecter/btreeindexiteratorimpl.h @@ -182,9 +182,10 @@ class BtreeIndexForwardIteratorImpl : public BtreeIndexIteratorImpl { size_t getBtreeIdsetSize() const noexcept override { return static_cast(this->idxMapIt_->second.Unsorted()).set_->size(); } - size_t getMaxIterations(size_t limitIters) noexcept { + std::pair getMaxIterations(size_t limitIters) noexcept { size_t cnt = 0; - for (auto it = idxMapItBegin_; cnt < limitIters && it != idxMapItEnd_; ++it) { + auto it = idxMapItBegin_; + for (; cnt < limitIters && it != idxMapItEnd_; ++it) { this->detectCurrentIdsetType(it->second.Unsorted()); switch (this->currentIdsetType_) { case Base::IdsetType::Btree: @@ -197,7 +198,7 @@ class BtreeIndexForwardIteratorImpl : public BtreeIndexIteratorImpl { std::abort(); } } - return cnt; + return {cnt, it == idxMapItEnd_}; } private: diff --git a/cpp_src/core/nsselecter/comparator/comparator_indexed.h b/cpp_src/core/nsselecter/comparator/comparator_indexed.h index d50a39604..fb8de9643 100644 --- a/cpp_src/core/nsselecter/comparator/comparator_indexed.h +++ b/cpp_src/core/nsselecter/comparator/comparator_indexed.h @@ -29,7 +29,7 @@ template struct ValuesHolder { struct Type { Type() noexcept = default; - Type(key_string v) noexcept : value_{std::move(v)}, valueView_{value_ ? std::string_view{*value_} : std::string_view{}} {} + Type(key_string v) noexcept : value_{std::move(v)}, valueView_{value_} {} Type(const Type& other) noexcept : Type{other.value_} {} Type(Type&& other) noexcept : Type{std::move(other.value_)} { other.valueView_ = {}; } Type& operator=(const Type& other) noexcept { diff --git a/cpp_src/core/nsselecter/comparator/equalposition_comparator_impl.h b/cpp_src/core/nsselecter/comparator/equalposition_comparator_impl.h index c3bed1e8a..38b4ee98e 100644 --- a/cpp_src/core/nsselecter/comparator/equalposition_comparator_impl.h +++ b/cpp_src/core/nsselecter/comparator/equalposition_comparator_impl.h @@ -217,7 +217,7 @@ class EqualPositionComparatorTypeImpl { return collateCompare(std::string_view(lhs), rhs, collate_) == ComparationResult::Gt; case CondRange: return (collateCompare(std::string_view(lhs), rhs, collate_) & ComparationResult::Ge) && - (collateCompare(std::string_view(lhs), std::string_view(*values_[1]), collate_) & ComparationResult::Le); + (collateCompare(std::string_view(lhs), std::string_view(values_[1]), collate_) & ComparationResult::Le); case CondSet: return valuesS_.find(std::string_view(lhs)) != valuesS_.end(); case CondAllSet: { @@ -263,7 +263,7 @@ class EqualPositionComparatorTypeImpl { } else { values_.emplace_back(value); if (values_.size() == 1) { - cachedValueSV_ = std::string_view(*values_[0]); + cachedValueSV_ = std::string_view(values_[0]); } } } diff --git a/cpp_src/core/nsselecter/joinedselector.cc b/cpp_src/core/nsselecter/joinedselector.cc index 6fed45c84..20c79a1a9 100644 --- a/cpp_src/core/nsselecter/joinedselector.cc +++ b/cpp_src/core/nsselecter/joinedselector.cc @@ -21,9 +21,9 @@ void JoinedSelector::selectFromRightNs(LocalQueryResults& joinItemR, const Query rightNs_->putToJoinCache(joinRes_, preSelectCtx_.ResultPtr()); } if (joinResLong.haveData) { - found = joinResLong.it.val.ids_->size(); + found = joinResLong.it.val.ids->size(); matchedAtLeastOnce = joinResLong.it.val.matchedAtLeastOnce; - rightNs_->FillResult(joinItemR, *joinResLong.it.val.ids_); + rightNs_->FillResult(joinItemR, *joinResLong.it.val.ids); } else { SelectCtxWithJoinPreSelect ctx(query, nullptr, preSelectCtx_); ctx.matchedAtLeastOnce = false; @@ -41,10 +41,10 @@ void JoinedSelector::selectFromRightNs(LocalQueryResults& joinItemR, const Query } if (joinResLong.needPut) { JoinCacheVal val; - val.ids_ = make_intrusive>(); + val.ids = make_intrusive>(); val.matchedAtLeastOnce = matchedAtLeastOnce; for (auto& r : joinItemR.Items()) { - val.ids_->Add(r.Id(), IdSet::Unordered, 0); + val.ids->Add(r.Id(), IdSet::Unordered, 0); } rightNs_->putToJoinCache(joinResLong, std::move(val)); } @@ -64,7 +64,7 @@ void JoinedSelector::selectFromPreResultValues(LocalQueryResults& joinItemR, con break; } found = true; - joinItemR.Add(item); + joinItemR.AddItemRef(item); } } matchedAtLeastOnce = matched; @@ -200,7 +200,7 @@ void JoinedSelector::AppendSelectIteratorOfJoinIndexData(SelectIteratorContainer assertrx_throw(!IsFullText(leftIndex->Type())); // Avoiding to use 'GetByJsonPath' during values extraction - // TODO: Sometimes this substituition may be effective even with 'GetByJsonPath', so we should allow user to hint this optimization. + // TODO: Sometimes this substitution may be effective even with 'GetByJsonPath', so we should allow user to hint this optimization. bool hasSparse = false; for (int field : joinEntry.RightFields()) { if (field == SetByJsonPath) { @@ -214,14 +214,14 @@ void JoinedSelector::AppendSelectIteratorOfJoinIndexData(SelectIteratorContainer const VariantArray values = std::visit(overloaded{[&](const IdSet& preselected) { - const std::vector* sortOrderes = nullptr; + const std::vector* sortOrders = nullptr; if (preresult.sortOrder.index) { - sortOrderes = &(preresult.sortOrder.index->SortOrders()); + sortOrders = &(preresult.sortOrder.index->SortOrders()); } return readValuesOfRightNsFrom( preselected, - [this, sortOrderes](IdType rowId) noexcept { - const auto properRowId = sortOrderes ? (*sortOrderes)[rowId] : rowId; + [this, sortOrders](IdType rowId) noexcept { + const auto properRowId = sortOrders ? (*sortOrders)[rowId] : rowId; return ConstPayload{rightNs_->payloadType_, rightNs_->items_[properRowId]}; }, joinEntry, rightNs_->payloadType_); diff --git a/cpp_src/core/nsselecter/nsselecter.cc b/cpp_src/core/nsselecter/nsselecter.cc index 302527da0..f98cd4b50 100644 --- a/cpp_src/core/nsselecter/nsselecter.cc +++ b/cpp_src/core/nsselecter/nsselecter.cc @@ -53,9 +53,9 @@ void NsSelecter::operator()(LocalQueryResults& result, SelectCtxWithJoinPreSelec if (aggregationQueryRef.CalcTotal() == ModeCachedTotal || containAggCountCached) { ckey = QueryCacheKey{ctx.query, kCountCachedKeyMode, ctx.joinedSelectors}; - auto cached = ns_->queryCountCache_->Get(ckey); - if (cached.valid && cached.val.total_count >= 0) { - result.totalCount += cached.val.total_count; + auto cached = ns_->queryCountCache_.Get(ckey); + if (cached.valid && cached.val.IsInitialized()) { + result.totalCount += cached.val.totalCount; if (logLevel >= LogTrace) { logPrintf(LogInfo, "[%s] using total count value from cache: %d", ns_->name_, result.totalCount); } @@ -280,6 +280,7 @@ void NsSelecter::operator()(LocalQueryResults& result, SelectCtxWithJoinPreSelec if (!qres.HasIdsets()) { SelectKeyResult scan; + std::string_view scanName = "-scan"; if (ctx.sortingContext.isOptimizationEnabled()) { auto it = ns_->indexes_[ctx.sortingContext.uncommitedIndex]->CreateIterator(); maxIterations = ns_->itemsCount(); @@ -287,17 +288,35 @@ void NsSelecter::operator()(LocalQueryResults& result, SelectCtxWithJoinPreSelec scan.emplace_back(std::move(it)); } else { // special case - no idset in query - IdType limit = ns_->items_.size(); - if (ctx.sortingContext.isIndexOrdered() && ctx.sortingContext.enableSortOrders) { - const Index* index = ctx.sortingContext.sortIndex(); - assertrx_throw(index); - limit = index->SortOrders().size(); + const auto itemsInUse = ns_->items_.size() - ns_->free_.size(); + const bool haveLotOfFree = (ns_->free_.size() > 200'000) && (ns_->free_.size() > (4 * itemsInUse)); + const bool useSortOrders = ctx.sortingContext.isIndexOrdered() && ctx.sortingContext.enableSortOrders; + if (haveLotOfFree && !useSortOrders) { + // Attempt to improve selection time by using dense IdSet, when there are a lot of empty items in namespace + scanName = "-scan-dns"; + base_idset denseSet; + denseSet.reserve(itemsInUse); + for (IdType i = 0, sz = IdType(ns_->items_.size()); i < sz; ++i) { + if (!ns_->items_[i].IsFree()) { + denseSet.emplace_back(i); + } + } + scan.emplace_back(make_intrusive>(std::move(denseSet))); + maxIterations = itemsInUse; + } else { + // Use ids range + IdType limit = ns_->items_.size(); + if (useSortOrders) { + const Index* index = ctx.sortingContext.sortIndex(); + assertrx_throw(index); + limit = index->SortOrders().size(); + } + scan.emplace_back(0, limit); + maxIterations = limit; } - scan.emplace_back(0, limit); - maxIterations = limit; } // Iterator Field Kind: -scan. Sorting Context! -> None - qres.AppendFront(OpAnd, std::move(scan), false, "-scan", IteratorFieldKind::None, true); + qres.AppendFront(OpAnd, std::move(scan), false, std::string(scanName), IteratorFieldKind::None, true); } // Get maximum iterations count, for right calculation comparators costs qres.SortByCost(maxIterations); @@ -446,7 +465,7 @@ void NsSelecter::operator()(LocalQueryResults& result, SelectCtxWithJoinPreSelec if rx_unlikely (logLevel >= LogTrace) { logPrintf(LogInfo, "[%s] put totalCount value into query cache: %d ", ns_->name_, result.totalCount); } - ns_->queryCountCache_->Put(ckey, {static_cast(result.totalCount - initTotalCount)}); + ns_->queryCountCache_.Put(ckey, {static_cast(result.totalCount - initTotalCount)}); } if constexpr (std::is_same_v) { if rx_unlikely (logLevel >= LogTrace) { @@ -641,8 +660,10 @@ class ForcedMapInserter { } else if (iter->second != cost_ - 1) { static constexpr auto errMsg = "Forced sort value '%s' is duplicated. Deduplicated by the first occurrence."; if constexpr (std::is_same_v) { + // NOLINTNEXTLINE (bugprone-use-after-move,-warnings-as-errors) logPrintf(LogInfo, errMsg, value.template As()); } else { + // NOLINTNEXTLINE (bugprone-use-after-move,-warnings-as-errors) logPrintf(LogInfo, errMsg, Variant{std::forward(value)}.template As()); } } @@ -673,11 +694,8 @@ It NsSelecter::applyForcedSortImpl(NamespaceImpl& ns, It begin, It end, const It VariantArray keyRefs; const auto boundary = std::stable_partition(begin, end, [&](const ItemRef& itemRef) { valueGetter.Payload(itemRef).Get(idx, keyRefs); - if constexpr (desc) { - return keyRefs.empty() || (sortMap.find(keyRefs[0]) == sortMap.end()); - } else { - return !keyRefs.empty() && (sortMap.find(keyRefs[0]) != sortMap.end()); - } + const auto descOrder = keyRefs.empty() || (sortMap.find(keyRefs[0]) == sortMap.end()); + return desc ? descOrder : !descOrder; }); VariantArray lhsItemValue; @@ -948,15 +966,16 @@ void NsSelecter::selectLoop(LoopCtx& ctx, ResultsT& result, co // reserve query results, if we have only 1 condition with 1 idset if (qres.Size() == 1 && qres.IsSelectIterator(0) && qres.Get(0).size() == 1) { - const unsigned reserve = std::min(unsigned(qres.Get(0).GetMaxIterations()), ctx.count); - if constexpr (std::is_same_v) { - if (auto* values = std::get_if(&sctx.preSelect.Result().payload); values) { - values->reserve(reserve + initCount); + if (const size_t reserve = qres.Get(0).GetMaxIterations(ctx.count); reserve < size_t(QueryEntry::kDefaultLimit)) { + if constexpr (std::is_same_v) { + if (auto* values = std::get_if(&sctx.preSelect.Result().payload); values) { + values->reserve(reserve + initCount); + } else { + resultReserve(result, initCount + reserve); + } } else { resultReserve(result, initCount + reserve); } - } else { - resultReserve(result, initCount + reserve); } } @@ -1199,10 +1218,10 @@ void NsSelecter::addSelectResult(uint8_t proc, IdType rowId, IdType properRowId, sctx.preSelect.Result().payload); } else { if (!sctx.sortingContext.expressions.empty()) { - result.Add({properRowId, sctx.sortingContext.exprResults[0].size(), proc, sctx.nsid}); + result.AddItemRef(properRowId, sctx.sortingContext.exprResults[0].size(), proc, sctx.nsid); calculateSortExpressions(proc, rowId, properRowId, sctx, result); } else { - result.Add({properRowId, ns_->items_[properRowId], proc, sctx.nsid}); + result.AddItemRef(properRowId, ns_->items_[properRowId], proc, sctx.nsid); } const int kLimitItems = 10000000; @@ -1520,7 +1539,7 @@ class CostCalculator { if (isInSequence_) { curCost_ += res.GetMaxIterations(totalCost_); } else { - totalCost_ = std::min(totalCost_, res.GetMaxIterations(totalCost_)); + totalCost_ = res.GetMaxIterations(totalCost_); } } }, diff --git a/cpp_src/core/nsselecter/querypreprocessor.cc b/cpp_src/core/nsselecter/querypreprocessor.cc index 5f4961f6c..4f49f8448 100644 --- a/cpp_src/core/nsselecter/querypreprocessor.cc +++ b/cpp_src/core/nsselecter/querypreprocessor.cc @@ -210,17 +210,17 @@ int QueryPreprocessor::calculateMaxIterations(const size_t from, const size_t to return res; } -void QueryPreprocessor::InjectConditionsFromJoins(JoinedSelectors& js, OnConditionInjections& expalainOnInjections, LogLevel logLevel, +void QueryPreprocessor::InjectConditionsFromJoins(JoinedSelectors& js, OnConditionInjections& explainOnInjections, LogLevel logLevel, bool inTransaction, bool enableSortOrders, const RdxContext& rdxCtx) { h_vector maxIterations(Size()); span maxItersSpan(maxIterations.data(), maxIterations.size()); const int maxIters = calculateMaxIterations(0, Size(), ns_.itemsCount(), maxItersSpan, inTransaction, enableSortOrders, rdxCtx); const bool needExplain = query_.NeedExplain() || logLevel >= LogInfo; if (needExplain) { - injectConditionsFromJoins(0, Size(), js, expalainOnInjections, maxIters, maxIterations, inTransaction, + injectConditionsFromJoins(0, Size(), js, explainOnInjections, maxIters, maxIterations, inTransaction, enableSortOrders, rdxCtx); } else { - injectConditionsFromJoins(0, Size(), js, expalainOnInjections, maxIters, maxIterations, inTransaction, + injectConditionsFromJoins(0, Size(), js, explainOnInjections, maxIters, maxIterations, inTransaction, enableSortOrders, rdxCtx); } assertrx_dbg(maxIterations.size() == Size()); @@ -371,7 +371,7 @@ std::pair QueryPreprocessor::removeAlwaysTrue(size_t begin, size_t } void QueryPreprocessor::Reduce(bool isFt) { - bool changed; + bool changed = false; do { changed = removeBrackets(); changed = LookupQueryIndexes() || changed; @@ -404,9 +404,12 @@ size_t QueryPreprocessor::removeBrackets(size_t begin, size_t end) { if (begin != end && GetOperation(begin) == OpOr) { throw Error{errQueryExec, "OR operator in first condition or after left join"}; } + if (!equalPositions.empty()) { + return 0; + } size_t deleted = 0; for (size_t i = begin; i < end - deleted; i = Next(i)) { - if (!IsSubTree(i)) { + if (!IsSubTree(i) || (Is(i) && !Get(i).equalPositions.empty())) { continue; } deleted += removeBrackets(i + 1, Next(i)); @@ -1487,7 +1490,7 @@ std::pair QueryPreprocessor::queryValuesFromOnCondition( JoinPreResult::CPtr joinPreresult, const QueryJoinEntry& joinEntry, CondType condition, int mainQueryMaxIterations, const RdxContext& rdxCtx) { - size_t limit = 0; + int64_t limit = 0; const auto& rNsCfg = rightNs.config(); if (rNsCfg.maxPreselectSize == 0) { limit = std::max(rNsCfg.minPreselectSize, rightNs.itemsCount() * rNsCfg.maxPreselectPart); @@ -1497,8 +1500,12 @@ std::pair QueryPreprocessor::queryValuesFromOnCondition( limit = std::min(std::max(rNsCfg.minPreselectSize, rightNs.itemsCount() * rNsCfg.maxPreselectPart), rNsCfg.maxPreselectSize); } + constexpr unsigned kExtraLimit = 2; + if (limit < 0 || limit > (std::numeric_limits::max() - kExtraLimit)) { + limit = std::numeric_limits::max() - kExtraLimit; + } joinQuery.Explain(query_.NeedExplain()); - joinQuery.Limit(limit + 2); + joinQuery.Limit(limit + kExtraLimit); joinQuery.Offset(QueryEntry::kDefaultOffset); joinQuery.sortingEntries_.clear(); joinQuery.forcedSortOrder_.clear(); @@ -1535,7 +1542,7 @@ std::pair QueryPreprocessor::queryValuesFromOnCondition( LocalQueryResults qr; SelectCtxWithJoinPreSelect ctx{joinQuery, nullptr, JoinPreResultExecuteCtx{std::move(joinPreresult), mainQueryMaxIterations}}; rightNs.Select(qr, ctx, rdxCtx); - if (ctx.preSelect.Mode() == JoinPreSelectMode::InjectionRejected || qr.Count() > limit) { + if (ctx.preSelect.Mode() == JoinPreSelectMode::InjectionRejected || qr.Count() > size_t(limit)) { return {CondAny, {}}; } assertrx_throw(qr.aggregationResults.size() == 1); diff --git a/cpp_src/core/nsselecter/querypreprocessor.h b/cpp_src/core/nsselecter/querypreprocessor.h index 7c348a13f..7eb8b48d4 100644 --- a/cpp_src/core/nsselecter/querypreprocessor.h +++ b/cpp_src/core/nsselecter/querypreprocessor.h @@ -50,7 +50,7 @@ class QueryPreprocessor : private QueryEntries { unsigned Count() const noexcept { return count_; } bool MoreThanOneEvaluation() const noexcept { return queryEntryAddedByForcedSortOptimization_; } bool AvailableSelectBySortIndex() const noexcept { return !queryEntryAddedByForcedSortOptimization_ || !forcedStage(); } - void InjectConditionsFromJoins(JoinedSelectors& js, OnConditionInjections& expalainOnInjections, LogLevel, bool inTransaction, + void InjectConditionsFromJoins(JoinedSelectors& js, OnConditionInjections& explainOnInjections, LogLevel, bool inTransaction, bool enableSortOrders, const RdxContext& rdxCtx); void Reduce(bool isFt); using QueryEntries::Size; diff --git a/cpp_src/core/nsselecter/selectiteratorcontainer.cc b/cpp_src/core/nsselecter/selectiteratorcontainer.cc index 33b747a15..6b286ea10 100644 --- a/cpp_src/core/nsselecter/selectiteratorcontainer.cc +++ b/cpp_src/core/nsselecter/selectiteratorcontainer.cc @@ -609,11 +609,12 @@ bool SelectIteratorContainer::prepareIteratorsForSelectLoop(QueryPreprocessor& q template RX_ALWAYS_INLINE bool SelectIteratorContainer::checkIfSatisfyCondition(SelectIterator& it, bool* finish, IdType rowId) { + IdType val = it.Val(); if constexpr (reverse) { - while (it.Val() > rowId && it.Next(rowId)) { + for (; val > rowId && it.Next(rowId); val = it.Val()) { } } else { - while (it.Val() < rowId && it.Next(rowId)) { + for (; val < rowId && it.Next(rowId); val = it.Val()) { } } if (it.End()) { @@ -621,9 +622,9 @@ RX_ALWAYS_INLINE bool SelectIteratorContainer::checkIfSatisfyCondition(SelectIte return false; } if constexpr (reverse) { - return it.Val() >= rowId; + return val >= rowId; } else { - return it.Val() <= rowId; + return val <= rowId; } } @@ -670,14 +671,11 @@ bool SelectIteratorContainer::checkIfSatisfyAllConditions(iterator begin, iterat }, [&] RX_PRE_LMBD_ALWAYS_INLINE(SelectIterator & sit) RX_POST_LMBD_ALWAYS_INLINE { return checkIfSatisfyCondition(sit, &lastFinish, rowId); }, - [&] /*RX_PRE_LMBD_ALWAYS_INLINE*/ (JoinSelectIterator & jit) /*RX_POST_LMBD_ALWAYS_INLINE*/ { - return checkIfSatisfyCondition(jit, pv, properRowId, match); - }, + [&] RX_PRE_LMBD_ALWAYS_INLINE(JoinSelectIterator & jit) + RX_POST_LMBD_ALWAYS_INLINE { return checkIfSatisfyCondition(jit, pv, properRowId, match); }, Restricted>{}( - [&pv, properRowId] /*RX_PRE_LMBD_ALWAYS_INLINE*/ (auto& c) /*RX_POST_LMBD_ALWAYS_INLINE*/ { - return c.Compare(pv, properRowId); - }), + [&pv, properRowId] RX_PRE_LMBD_ALWAYS_INLINE(auto& c) RX_POST_LMBD_ALWAYS_INLINE { return c.Compare(pv, properRowId); }), [] RX_PRE_LMBD_ALWAYS_INLINE(AlwaysTrue&) RX_POST_LMBD_ALWAYS_INLINE noexcept { return true; }); if (op == OpOr) { result |= lastResult; @@ -703,30 +701,31 @@ IdType SelectIteratorContainer::getNextItemId(const_iterator begin, const_iterat switch (it->operation) { case OpOr: { auto next = it->Visit( - [it, from](const SelectIteratorsBracket&) { return getNextItemId(it.cbegin(), it.cend(), from); }, - [from](const SelectIterator& sit) { + [it, from] RX_PRE_LMBD_ALWAYS_INLINE(const SelectIteratorsBracket&) + RX_POST_LMBD_ALWAYS_INLINE { return getNextItemId(it.cbegin(), it.cend(), from); }, + [from] RX_PRE_LMBD_ALWAYS_INLINE(const SelectIterator& sit) RX_POST_LMBD_ALWAYS_INLINE { if constexpr (reverse) { if (sit.End()) { return std::numeric_limits::lowest(); } - if (sit.Val() < from) { - return sit.Val() + 1; + if (const auto val = sit.Val(); val < from) { + return val + 1; } } else { if (sit.End()) { return std::numeric_limits::max(); } - if (sit.Val() > from) { - return sit.Val() - 1; + if (const auto val = sit.Val(); val > from) { + return val - 1; } } return from; }, - [from](const OneOf>) { - return from; - }, - [](const AlwaysFalse&) { + [from] RX_PRE_LMBD_ALWAYS_INLINE( + const OneOf>) + RX_POST_LMBD_ALWAYS_INLINE { return from; }, + [] RX_PRE_LMBD_ALWAYS_INLINE(const AlwaysFalse&) RX_POST_LMBD_ALWAYS_INLINE { return reverse ? std::numeric_limits::lowest() : std::numeric_limits::max(); }); if constexpr (reverse) { @@ -738,30 +737,31 @@ IdType SelectIteratorContainer::getNextItemId(const_iterator begin, const_iterat case OpAnd: from = result; result = it->Visit( - [it, from](const SelectIteratorsBracket&) { return getNextItemId(it.cbegin(), it.cend(), from); }, - [from](const SelectIterator& sit) { + [it, from] RX_PRE_LMBD_ALWAYS_INLINE(const SelectIteratorsBracket&) + RX_POST_LMBD_ALWAYS_INLINE { return getNextItemId(it.cbegin(), it.cend(), from); }, + [from] RX_PRE_LMBD_ALWAYS_INLINE(const SelectIterator& sit) RX_POST_LMBD_ALWAYS_INLINE { if constexpr (reverse) { if (sit.End()) { return std::numeric_limits::lowest(); } - if (sit.Val() < from) { - return sit.Val() + 1; + if (const auto val = sit.Val(); val < from) { + return val + 1; } } else { if (sit.End()) { return std::numeric_limits::max(); } - if (sit.Val() > from) { - return sit.Val() - 1; + if (const auto val = sit.Val(); val > from) { + return val - 1; } } return from; }, - [from](const OneOf>) { - return from; - }, - [](const AlwaysFalse&) { + [from] RX_PRE_LMBD_ALWAYS_INLINE( + const OneOf>) + RX_POST_LMBD_ALWAYS_INLINE { return from; }, + [] RX_PRE_LMBD_ALWAYS_INLINE(const AlwaysFalse&) RX_POST_LMBD_ALWAYS_INLINE { return reverse ? std::numeric_limits::lowest() : std::numeric_limits::max(); }); break; diff --git a/cpp_src/core/parallelexecutor.h b/cpp_src/core/parallelexecutor.h index f1b33eb9f..ba68c3b0a 100644 --- a/cpp_src/core/parallelexecutor.h +++ b/cpp_src/core/parallelexecutor.h @@ -1,5 +1,6 @@ #pragma once +#include #include "cluster/sharding/sharding.h" #include "core/queryresults/queryresults.h" diff --git a/cpp_src/core/payload/fieldsset.h b/cpp_src/core/payload/fieldsset.h index 8130b635b..fd4b070f4 100644 --- a/cpp_src/core/payload/fieldsset.h +++ b/cpp_src/core/payload/fieldsset.h @@ -6,6 +6,8 @@ #include "core/cjson/tagspath.h" #include "core/type_consts.h" #include "estl/h_vector.h" +#include "estl/overloaded.h" +#include "tools/assertrx.h" namespace reindexer { diff --git a/cpp_src/core/payload/payloadfieldtype.cc b/cpp_src/core/payload/payloadfieldtype.cc index a66ce588f..2b5f6ac28 100644 --- a/cpp_src/core/payload/payloadfieldtype.cc +++ b/cpp_src/core/payload/payloadfieldtype.cc @@ -1,5 +1,6 @@ #include "payloadfieldtype.h" #include +#include "core/index/index.h" #include "core/keyvalue/p_string.h" #include "core/keyvalue/uuid.h" #include "estl/one_of.h" @@ -7,6 +8,14 @@ namespace reindexer { +PayloadFieldType::PayloadFieldType(const Index& index, const IndexDef& indexDef) noexcept + : type_(index.KeyType()), + name_(indexDef.name_), + jsonPaths_(indexDef.jsonPaths_), + offset_(0), + isArray_(index.Opts().IsArray()), + arrayDim_(indexDef.Type() == IndexType::IndexRTree ? 2 : -1) {} + size_t PayloadFieldType::Sizeof() const noexcept { if (IsArray()) { return sizeof(PayloadFieldValue::Array); diff --git a/cpp_src/core/payload/payloadfieldtype.h b/cpp_src/core/payload/payloadfieldtype.h index 438b3563b..5f6af0b06 100644 --- a/cpp_src/core/payload/payloadfieldtype.h +++ b/cpp_src/core/payload/payloadfieldtype.h @@ -5,16 +5,20 @@ namespace reindexer { +class Index; +struct IndexDef; // Type of field class PayloadFieldType { public: + explicit PayloadFieldType(const Index&, const IndexDef&) noexcept; PayloadFieldType(KeyValueType t, std::string n, std::vector j, bool a) noexcept - : type_(t), name_(std::move(n)), jsonPaths_(std::move(j)), offset_(0), isArray_(a) {} + : type_(t), name_(std::move(n)), jsonPaths_(std::move(j)), offset_(0), isArray_(a), arrayDim_(-1) {} size_t Sizeof() const noexcept; size_t ElemSizeof() const noexcept; size_t Alignof() const noexcept; bool IsArray() const noexcept { return isArray_; } + int8_t ArrayDim() const noexcept { return arrayDim_; } void SetArray() noexcept { isArray_ = true; } void SetOffset(size_t o) noexcept { offset_ = o; } size_t Offset() const noexcept { return offset_; } @@ -32,6 +36,7 @@ class PayloadFieldType { std::vector jsonPaths_; size_t offset_; bool isArray_; + int8_t arrayDim_; }; } // namespace reindexer diff --git a/cpp_src/core/payload/payloadiface.cc b/cpp_src/core/payload/payloadiface.cc index 77dbd276a..887c7591a 100644 --- a/cpp_src/core/payload/payloadiface.cc +++ b/cpp_src/core/payload/payloadiface.cc @@ -556,16 +556,19 @@ template void PayloadIface::AddRefStrings(int field) noexcept { auto& f = t_.Field(field); assertrx(f.Type().template Is()); + auto vptr = v_->Ptr(); // direct payloadvalue manipulation for speed optimize if (!f.IsArray()) { - auto str = *reinterpret_cast((v_->Ptr() + f.Offset())); - key_string_add_ref(const_cast(str.getCxxstr())); + auto str = *reinterpret_cast((vptr + f.Offset())); + key_string_impl::addref_unsafe(str.getBaseKeyString()); } else { - auto arr = reinterpret_cast(v_->Ptr() + f.Offset()); - for (int i = 0; i < arr->len; i++) { - auto str = *reinterpret_cast(v_->Ptr() + arr->offset + i * t_.Field(field).ElemSizeof()); - key_string_add_ref(const_cast(str.getCxxstr())); + const auto elemSize = f.ElemSizeof(); + auto arr = reinterpret_cast(vptr + f.Offset()); + const auto arrOffset = arr->offset; + for (int i = 0, arrLen = arr->len; i < arrLen; ++i) { + auto str = reinterpret_cast(vptr + arrOffset + i * elemSize); + key_string_impl::addref_unsafe(str->getBaseKeyString()); } } } @@ -581,16 +584,19 @@ template void PayloadIface::ReleaseStrings(int field) noexcept { auto& f = t_.Field(field); assertrx(f.Type().template Is()); + auto vptr = v_->Ptr(); // direct payloadvalue manipulation for speed optimize if (!f.IsArray()) { - auto str = *reinterpret_cast((v_->Ptr() + f.Offset())); - key_string_release(const_cast(str.getCxxstr())); + auto str = reinterpret_cast((vptr + f.Offset())); + key_string_impl::release_unsafe(str->getBaseKeyString()); } else { - auto arr = reinterpret_cast(v_->Ptr() + f.Offset()); - for (int i = 0; i < arr->len; i++) { - auto str = *reinterpret_cast(v_->Ptr() + arr->offset + i * t_.Field(field).ElemSizeof()); - key_string_release(const_cast(str.getCxxstr())); + const auto elemSize = f.ElemSizeof(); + auto arr = reinterpret_cast(vptr + f.Offset()); + const auto arrOffset = arr->offset; + for (int i = 0, arrLen = arr->len; i < arrLen; ++i) { + auto str = reinterpret_cast(vptr + arrOffset + i * elemSize); + key_string_impl::release_unsafe(str->getBaseKeyString()); } } } @@ -604,12 +610,12 @@ void PayloadIface::copyOrMoveStrings(int field, StrHolder& dest, bool copy) { // direct payloadvalue manipulation for speed optimize if (!f.IsArray()) { auto str = *reinterpret_cast((v_->Ptr() + f.Offset())); - dest.emplace_back(reinterpret_cast(const_cast(str.getCxxstr())), copy); + dest.emplace_back(str.getBaseKeyString(), copy); } else { auto arr = reinterpret_cast(v_->Ptr() + f.Offset()); for (int i = 0; i < arr->len; i++) { auto str = *reinterpret_cast(v_->Ptr() + arr->offset + i * t_.Field(field).ElemSizeof()); - dest.emplace_back(reinterpret_cast(const_cast(str.getCxxstr())), copy); + dest.emplace_back(str.getBaseKeyString(), copy); } } } @@ -639,14 +645,21 @@ void PayloadIface::MoveStrings(int field, StringsHolder& dest) { template void PayloadIface::CopyStrings(std::vector& dest) { - for (auto field : t_.StrFields()) { + for (int field : t_.StrFields()) { + copyOrMoveStrings(field, dest, true); + } +} + +template +void PayloadIface::CopyStrings(h_vector& dest) { + for (int field : t_.StrFields()) { copyOrMoveStrings(field, dest, true); } } template void PayloadIface::ReleaseStrings() noexcept { - for (auto field : t_.StrFields()) { + for (int field : t_.StrFields()) { ReleaseStrings(field); } } diff --git a/cpp_src/core/payload/payloadiface.h b/cpp_src/core/payload/payloadiface.h index 68d1217df..8cb38635d 100644 --- a/cpp_src/core/payload/payloadiface.h +++ b/cpp_src/core/payload/payloadiface.h @@ -165,6 +165,7 @@ class PayloadIface { void ReleaseStrings(int field) noexcept; void MoveStrings(int field, StringsHolder& dest); void CopyStrings(std::vector& dest); + void CopyStrings(h_vector& dest); // Item values' string for printing std::string Dump() const; diff --git a/cpp_src/core/payload/payloadtype.cc b/cpp_src/core/payload/payloadtype.cc index 4ad5fda89..bace37809 100644 --- a/cpp_src/core/payload/payloadtype.cc +++ b/cpp_src/core/payload/payloadtype.cc @@ -151,7 +151,7 @@ int PayloadTypeImpl::FieldByJsonPath(std::string_view jsonPath) const noexcept { } void PayloadTypeImpl::serialize(WrSerializer& ser) const { - ser.PutVarUint(base_key_string::export_hdr_offset()); + ser.PutVarUint(key_string_impl::export_hdr_offset()); ser.PutVarUint(NumFields()); for (int i = 0; i < NumFields(); i++) { ser.PutKeyValueType(Field(i).Type()); @@ -191,9 +191,6 @@ void PayloadTypeImpl::deserialize(Serializer& ser) { PayloadFieldType ft(t, name, std::move(jsonPaths), isArray); - if (isArray) { - ft.SetArray(); - } ft.SetOffset(offset); fieldsByName_.emplace(std::move(name), fields_.size()); if (t.Is()) { diff --git a/cpp_src/core/payload/payloadvalue.cc b/cpp_src/core/payload/payloadvalue.cc index 03ce330f4..353013624 100644 --- a/cpp_src/core/payload/payloadvalue.cc +++ b/cpp_src/core/payload/payloadvalue.cc @@ -27,14 +27,6 @@ uint8_t* PayloadValue::alloc(size_t cap) { return pn; } -void PayloadValue::release() noexcept { - if (p_ && header()->refcount.fetch_sub(1, std::memory_order_acq_rel) == 1) { - header()->~dataHeader(); - operator delete(p_); - } - p_ = nullptr; -} - void PayloadValue::Clone(size_t size) { // If we have exclusive data - just up lsn if (p_ && header()->refcount.load(std::memory_order_acquire) == 1) { diff --git a/cpp_src/core/payload/payloadvalue.h b/cpp_src/core/payload/payloadvalue.h index a54b937a0..1ec46bb44 100644 --- a/cpp_src/core/payload/payloadvalue.h +++ b/cpp_src/core/payload/payloadvalue.h @@ -3,9 +3,12 @@ #include #include #include -#include "tools/assertrx.h" #include "tools/lsn.h" +#ifdef RX_WITH_STDLIB_DEBUG +#include "tools/assertrx.h" +#endif // RX_WITH_STDLIB_DEBUG + namespace reindexer { // The full item's payload object. It must be speed & size optimized @@ -15,7 +18,11 @@ class PayloadValue { struct dataHeader { dataHeader() noexcept : refcount(1), cap(0), lsn(-1) {} - ~dataHeader() { assertrx(refcount.load(std::memory_order_acquire) == 0); } +#ifdef RX_WITH_STDLIB_DEBUG + ~dataHeader() { assertrx_dbg(refcount.load(std::memory_order_acquire) == 0); } +#else // RX_WITH_STDLIB_DEBUG + ~dataHeader() = default; +#endif // RX_WITH_STDLIB_DEBUG refcounter refcount; unsigned cap; lsn_t lsn; @@ -61,12 +68,20 @@ class PayloadValue { lsn_t GetLSN() const noexcept { return p_ ? header()->lsn : lsn_t(); } bool IsFree() const noexcept { return bool(p_ == nullptr); } void Free() noexcept { release(); } - size_t GetCapacity() const noexcept { return header()->cap; } + size_t GetCapacity() const noexcept { return p_ ? header()->cap : 0; } const uint8_t* get() const noexcept { return p_; } protected: uint8_t* alloc(size_t cap); - void release() noexcept; + void release() noexcept { + if (p_) { + if (auto& hdr = *header(); hdr.refcount.fetch_sub(1, std::memory_order_acq_rel) == 1) { + hdr.~dataHeader(); + operator delete(p_); + } + p_ = nullptr; + } + } dataHeader* header() noexcept { return reinterpret_cast(p_); } const dataHeader* header() const noexcept { return reinterpret_cast(p_); } diff --git a/cpp_src/core/query/dsl/dslparser.cc b/cpp_src/core/query/dsl/dslparser.cc index af9240952..8e8340fa2 100644 --- a/cpp_src/core/query/dsl/dslparser.cc +++ b/cpp_src/core/query/dsl/dslparser.cc @@ -276,10 +276,9 @@ static void parseSort(const JsonValue& v, SortingEntries& sortingEntries, std::v } void parseSingleJoinQuery(const JsonValue& join, Query& query); -void parseEqualPositions(const JsonValue& dsl, std::vector>& equalPositions, size_t lastBracketPosition); +void parseEqualPositions(const JsonValue& dsl, Query& query); -static void parseFilter(const JsonValue& filter, Query& q, std::vector>& equalPositions, - size_t lastBracketPosition) { +static void parseFilter(const JsonValue& filter, Query& q) { OpType op = OpAnd; CondType condition{CondEq}; VariantArray values; @@ -333,16 +332,15 @@ static void parseFilter(const JsonValue& filter, Query& q, std::vector>& equalPositions, - size_t lastBracketPosition) { +void parseEqualPositions(const JsonValue& dsl, Query& query) { for (const auto& ar : dsl) { auto subArray = ar.value; checkJsonValueType(subArray, ar.key, JSON_OBJECT); @@ -582,11 +572,7 @@ void parseEqualPositions(const JsonValue& dsl, std::vector> equalPositions; for (const auto& elem : root) { auto& v = elem.value; auto name = elem.key; @@ -673,7 +657,7 @@ void parse(const JsonValue& root, Query& q) { case Root::Filters: checkJsonValueType(v, name, JSON_ARRAY); for (const auto& filter : v) { - parseFilter(filter.value, q, equalPositions, 0); + parseFilter(filter.value, q); } break; @@ -749,37 +733,29 @@ void parse(const JsonValue& root, Query& q) { break; } } - for (auto&& eqPos : equalPositions) { - if (eqPos.first == 0) { - q.SetEqualPositions(std::move(eqPos.second)); - } else { - q.SetEqualPositions(eqPos.first - 1, std::move(eqPos.second)); - } - } } #include "query.json.h" -Error Parse(std::string_view str, Query& q) { +void Parse(std::string_view str, Query& q) { static JsonSchemaChecker schemaChecker(kQueryJson, "query"); try { gason::JsonParser parser; auto root = parser.Parse(str); Error err = schemaChecker.Check(root); if (!err.ok()) { - return err; + throw err; } dsl::parse(root.value, q); } catch (const gason::Exception& ex) { - return Error(errParseJson, "Query: %s", ex.what()); + throw Error(errParseJson, "Query: %s", ex.what()); } catch (const Error& err) { - return err; + throw err; } catch (const std::exception& ex) { - return Error(errParseJson, "Exception: %s", ex.what()); + throw Error(errParseJson, "Exception: %s", ex.what()); } catch (...) { - return Error(errParseJson, "Unknown Exception"); + throw Error(errParseJson, "Unknown Exception"); } - return errOK; } } // namespace dsl diff --git a/cpp_src/core/query/dsl/dslparser.h b/cpp_src/core/query/dsl/dslparser.h index 84311cfc7..bfc3af215 100644 --- a/cpp_src/core/query/dsl/dslparser.h +++ b/cpp_src/core/query/dsl/dslparser.h @@ -1,7 +1,6 @@ #pragma once #include -#include "tools/errors.h" namespace reindexer { @@ -9,7 +8,7 @@ class Query; namespace dsl { -Error Parse(std::string_view dsl, Query& q); +void Parse(std::string_view dsl, Query& q); } // namespace dsl } // namespace reindexer diff --git a/cpp_src/core/query/query.cc b/cpp_src/core/query/query.cc index 631ab0772..00e897928 100644 --- a/cpp_src/core/query/query.cc +++ b/cpp_src/core/query/query.cc @@ -10,9 +10,6 @@ namespace reindexer { using namespace std::string_view_literals; -const std::string_view kLsnIndexName = "#lsn"sv; -const std::string_view kSlaveVersionIndexName = "#slave_version"sv; - void Query::checkSubQuery() const { if rx_unlikely (type_ != QuerySelect) { throw Error{errQueryExec, "Subquery should be select"}; @@ -45,7 +42,7 @@ void Query::checkSubQuery() const { void Query::checkSubQueryNoData() const { if rx_unlikely (!aggregations_.empty()) { - throw Error{errQueryExec, "Aggregaton cannot be in subquery with condition Any or Empty"}; + throw Error{errQueryExec, "Aggregation cannot be in subquery with condition Any or Empty"}; } if rx_unlikely (HasLimit() && Limit() != 0) { throw Error{errQueryExec, "Limit cannot be in subquery with condition Any or Empty"}; @@ -121,7 +118,11 @@ bool JoinedQuery::operator==(const JoinedQuery& obj) const { } Query Query::FromSQL(std::string_view q) { return SQLParser::Parse(q); } -Error Query::FromJSON(std::string_view dsl) { return dsl::Parse(dsl, *this); } +Query Query::FromJSON(std::string_view dsl) { + Query q; + dsl::Parse(dsl, q); + return q; +} std::string Query::GetJSON() const { return dsl::toDsl(*this); } @@ -140,6 +141,20 @@ std::string Query::GetSQL(QueryType realType) const { return std::string(SQLEncoder(*this, realType).GetSQL(ser, false).Slice()); } +Query& Query::EqualPositions(EqualPosition_t&& ep) & { + if (ep.size() < 2) { + throw Error(errParams, "EqualPosition must have at least 2 field. Fields: [%s]", ep.size() == 1 ? ep[0] : ""); + } + QueryEntriesBracket* bracketPointer = entries_.LastOpenBracket(); + + if (bracketPointer == nullptr) { + entries_.equalPositions.emplace_back(std::move(ep)); + } else { + bracketPointer->equalPositions.emplace_back(std::move(ep)); + } + return *this; +} + void Query::Join(JoinedQuery&& jq) & { switch (jq.joinType) { case JoinType::Merge: @@ -175,7 +190,7 @@ void Query::checkSetObjectValue(const Variant& value) const { } } -VariantArray Query::deserializeValues(Serializer& ser, CondType cond) { +VariantArray Query::deserializeValues(Serializer& ser, CondType cond) const { VariantArray values; auto cnt = ser.GetVarUint(); if (cond == CondDWithin) { @@ -198,6 +213,8 @@ VariantArray Query::deserializeValues(Serializer& ser, CondType cond) { return values; } +void Query::deserializeJoinOn(Serializer&) { throw Error(errLogic, "Unexpected call. JoinOn actual only for JoinQuery"); } + void Query::deserialize(Serializer& ser, bool& hasJoinConditions) { bool end = false; std::vector> equalPositions; @@ -252,7 +269,7 @@ void Query::deserialize(Serializer& ser, bool& hasJoinConditions) { aggregations_.emplace_back(type, std::move(fields)); auto& ae = aggregations_.back(); while (!ser.Eof() && !aggEnd) { - int atype = ser.GetVarUint(); + auto atype = ser.GetVarUint(); switch (atype) { case QueryAggregationSort: { auto fieldName = ser.GetVString(); @@ -287,7 +304,7 @@ void Query::deserialize(Serializer& ser, bool& hasJoinConditions) { if (sortingEntry.expression.length()) { sortingEntries_.push_back(std::move(sortingEntry)); } - int cnt = ser.GetVarUint(); + auto cnt = ser.GetVarUint(); if (cnt != 0 && sortingEntries_.size() != 1) { throw Error(errParams, "Forced sort order is allowed for the first sorting entry only"); } @@ -298,12 +315,7 @@ void Query::deserialize(Serializer& ser, bool& hasJoinConditions) { break; } case QueryJoinOn: { - const OpType op = static_cast(ser.GetVarUint()); - const CondType condition = static_cast(ser.GetVarUint()); - std::string leftFieldName{ser.GetVString()}; - std::string rightFieldName{ser.GetVString()}; - reinterpret_cast(this)->joinEntries_.emplace_back(op, condition, std::move(leftFieldName), - std::move(rightFieldName)); + deserializeJoinOn(ser); break; } case QueryDebugLevel: @@ -353,7 +365,7 @@ void Query::deserialize(Serializer& ser, bool& hasJoinConditions) { VariantArray val; std::string field(ser.GetVString()); bool isArray = ser.GetVarUint(); - int numValues = ser.GetVarUint(); + auto numValues = ser.GetVarUint(); bool hasExpressions = false; while (numValues--) { hasExpressions = ser.GetVarUint(); @@ -365,7 +377,7 @@ void Query::deserialize(Serializer& ser, bool& hasJoinConditions) { case QueryUpdateField: { VariantArray val; std::string field(ser.GetVString()); - int numValues = ser.GetVarUint(); + auto numValues = ser.GetVarUint(); bool isArray = numValues > 1; bool hasExpressions = false; while (numValues--) { @@ -379,7 +391,7 @@ void Query::deserialize(Serializer& ser, bool& hasJoinConditions) { VariantArray val; std::string field(ser.GetVString()); bool hasExpressions = false; - int numValues = ser.GetVarUint(); + auto numValues = ser.GetVarUint(); val.MarkArray(ser.GetVarUint() == 1); while (numValues--) { hasExpressions = ser.GetVarUint(); @@ -431,9 +443,10 @@ void Query::deserialize(Serializer& ser, bool& hasJoinConditions) { entries_.Get(eqPos.first - 1).equalPositions.emplace_back(std::move(eqPos.second)); } } - return; } +void Query::serializeJoinEntries(WrSerializer&) const { throw Error(errLogic, "Unexpected call. JoinEntries actual only for JoinQuery"); } + void Query::Serialize(WrSerializer& ser, uint8_t mode) const { ser.PutVString(NsName()); entries_.Serialize(ser, subQueries_); @@ -481,13 +494,7 @@ void Query::Serialize(WrSerializer& ser, uint8_t mode) const { } if (mode & WithJoinEntries) { - for (const auto& qje : reinterpret_cast(this)->joinEntries_) { - ser.PutVarUint(QueryJoinOn); - ser.PutVarUint(qje.Operation()); - ser.PutVarUint(qje.Condition()); - ser.PutVString(qje.LeftFieldName()); - ser.PutVString(qje.RightFieldName()); - } + serializeJoinEntries(ser); } for (const auto& equalPoses : entries_.equalPositions) { @@ -746,6 +753,9 @@ void Query::WalkNested(bool withSelf, bool withMerged, bool withSubQueries, cons } bool Query::IsWALQuery() const noexcept { + constexpr static std::string_view kLsnIndexName = "#lsn"sv; + constexpr static std::string_view kSlaveVersionIndexName = "#slave_version"sv; + if (entries_.Size() == 1 && entries_.Is(0) && kLsnIndexName == entries_.Get(0).FieldName()) { return true; } else if (entries_.Size() == 2 && entries_.Is(0) && entries_.Is(1)) { @@ -757,4 +767,22 @@ bool Query::IsWALQuery() const noexcept { return false; } +void JoinedQuery::deserializeJoinOn(Serializer& ser) { + const OpType op = static_cast(ser.GetVarUint()); + const CondType condition = static_cast(ser.GetVarUint()); + std::string leftFieldName{ser.GetVString()}; + std::string rightFieldName{ser.GetVString()}; + joinEntries_.emplace_back(op, condition, std::move(leftFieldName), std::move(rightFieldName)); +} + +void JoinedQuery::serializeJoinEntries(WrSerializer& ser) const { + for (const auto& qje : joinEntries_) { + ser.PutVarUint(QueryJoinOn); + ser.PutVarUint(qje.Operation()); + ser.PutVarUint(qje.Condition()); + ser.PutVString(qje.LeftFieldName()); + ser.PutVString(qje.RightFieldName()); + } +} + } // namespace reindexer diff --git a/cpp_src/core/query/query.h b/cpp_src/core/query/query.h index 50bbce6b3..a084980cc 100644 --- a/cpp_src/core/query/query.h +++ b/cpp_src/core/query/query.h @@ -34,6 +34,12 @@ class Query { : namespace_(std::forward(nsName)), start_(start), count_(count), calcTotal_(calcTotal) {} Query() = default; + virtual ~Query() = default; + + Query(Query&& other) noexcept = default; + Query& operator=(Query&& other) noexcept = default; + Query(const Query& other) = default; + Query& operator=(const Query& other) = delete; /// Allows to compare 2 Query objects. [[nodiscard]] bool operator==(const Query&) const; @@ -63,10 +69,10 @@ class Query { /// @return Query in SQL format [[nodiscard]] std::string GetSQL(QueryType realType) const; - /// Parses JSON dsl set. + /// Parses JSON dsl set. Throws Error-exption on errors /// @param dsl - dsl set. - /// @return always returns errOk or throws an exception. - Error FromJSON(std::string_view dsl); + /// @return Result query + static Query FromJSON(std::string_view dsl); /// returns structure of a query in JSON dsl format [[nodiscard]] std::string GetJSON() const; @@ -880,10 +886,10 @@ class Query { entries_.SetValue(i, T{std::forward(args)...}); } void UpdateField(UpdateEntry&& ue) & { updateFields_.emplace_back(std::move(ue)); } - void SetEqualPositions(EqualPosition_t&& ep) & { entries_.equalPositions.emplace_back(std::move(ep)); } - void SetEqualPositions(size_t bracketPosition, EqualPosition_t&& ep) & { - entries_.Get(bracketPosition).equalPositions.emplace_back(std::move(ep)); - } + + Query& EqualPositions(EqualPosition_t&& ep) &; + [[nodiscard]] Query&& EqualPositions(EqualPosition_t&& ep) && { return std::move(EqualPositions(std::move(ep))); } + void Join(JoinedQuery&&) &; void ReserveQueryEntries(size_t s) & { entries_.Reserve(s); } template @@ -977,8 +983,10 @@ class Query { using OnHelperR = OnHelperTempl; void checkSetObjectValue(const Variant& value) const; + virtual void deserializeJoinOn(Serializer& ser); void deserialize(Serializer& ser, bool& hasJoinConditions); - VariantArray deserializeValues(Serializer&, CondType); + VariantArray deserializeValues(Serializer&, CondType) const; + virtual void serializeJoinEntries(WrSerializer& ser) const; void checkSubQueryNoData() const; void checkSubQueryWithData() const; void checkSubQuery() const; @@ -1004,7 +1012,7 @@ class Query { OpType nextOp_ = OpAnd; /// Next operation constant. }; -class JoinedQuery : public Query { +class JoinedQuery final : public Query { public: JoinedQuery(JoinType jt, const Query& q) : Query(q), joinType{jt} {} JoinedQuery(JoinType jt, Query&& q) : Query(std::move(q)), joinType{jt} {} @@ -1014,6 +1022,10 @@ class JoinedQuery : public Query { JoinType joinType{JoinType::LeftJoin}; /// Default join type. h_vector joinEntries_; /// Condition for join. Filled in each subqueries, empty in root query + +private: + void deserializeJoinOn(Serializer& ser) override; + void serializeJoinEntries(WrSerializer& ser) const override; }; template diff --git a/cpp_src/core/query/sql/sqlparser.cc b/cpp_src/core/query/sql/sqlparser.cc index a9921ffaf..6497731f6 100644 --- a/cpp_src/core/query/sql/sqlparser.cc +++ b/cpp_src/core/query/sql/sqlparser.cc @@ -1,12 +1,10 @@ #include "sqlparser.h" #include "core/keyvalue/geometry.h" -#include "core/keyvalue/key_string.h" #include "core/query/query.h" #include "core/queryresults/aggregationresult.h" #include "core/type_consts_helpers.h" #include "sqltokentype.h" #include "tools/stringstools.h" -#include "vendor/double-conversion/double-conversion.h" #include "vendor/gason/gason.h" namespace reindexer { @@ -34,7 +32,8 @@ token SQLParser::peekSqlToken(tokenizer& parser, SqlTokenType tokenType, bool to tokenLen = ctx_.suggestionsPos - parser.getPos() + 1; } if (!ctx_.foundPossibleSuggestions || tokenLen) { - ctx_.suggestions.emplace_back(std::string(tok.text().data(), tokenLen), tokenType); + // NOLINTNEXTLINE(bugprone-suspicious-stringview-data-usage) + ctx_.suggestions.emplace_back(std::string(tok.text().data(), std::min(tok.text().size(), tokenLen)), tokenType); ctx_.foundPossibleSuggestions = true; ctx_.possibleSuggestionDetectedInThisClause = true; } @@ -818,19 +817,32 @@ int SQLParser::parseWhere(tokenizer& parser) { nextOp = OpNot; parser.next_token(); } - std::vector> equalPositions; - size_t lastBracketPosition = 0; int openBracketsCount = 0; + bool expectSecondLogicalOperand = false; + auto throwIfExpectSecondLogicalOperand = [&tok, &parser, &expectSecondLogicalOperand]() { + if (expectSecondLogicalOperand) { + throw Error(errParseSQL, "Expected second logical operand, but found '%s' in query '%s'", tok.text(), parser.where()); + } + }; while (!parser.end()) { tok = peekSqlToken(parser, nested == Nested::Yes ? NestedWhereFieldSqlToken : WhereFieldSqlToken, false); parser.next_token(tokenizer::flags::no_flags); + while (tok.type == TokenName && iequals(tok.text(), "equal_position"sv)) { + parseEqualPositions(parser); + tok = peekSqlToken(parser, nested == Nested::Yes ? NestedWhereFieldSqlToken : WhereFieldSqlToken, false); + parser.next_token(tokenizer::flags::no_flags); + } + expectSecondLogicalOperand = false; if (tok.text() == "("sv) { tok = peekSqlToken(parser, nested == Nested::Yes ? NestedWhereFieldSqlToken : WhereFieldOrSubquerySqlToken, false); - if (nested == Nested::Yes || !iequals(tok.text(), "select"sv) || isCondition(parser.peek_second_token().text())) { + // isCondition(parser.peek_second_token().text() to distinguish the token type operator 'select' or field with name 'select' + if (nested == Nested::No && iequals(tok.text(), "select"sv) && !isCondition(parser.peek_second_token().text())) { + parseWhereCondition(parser, parseSubQuery(parser), nextOp); + nextOp = OpAnd; + } else { query_.NextOp(nextOp); query_.OpenBracket(); ++openBracketsCount; - lastBracketPosition = query_.Entries().Size(); if (iequals(tok.text(), "not"sv)) { nextOp = OpNot; parser.next_token(); @@ -839,8 +851,6 @@ int SQLParser::parseWhere(tokenizer& parser) { } continue; } - parseWhereCondition(parser, parseSubQuery(parser), nextOp); - nextOp = OpAnd; } else if (tok.type == TokenName) { if (iequals(tok.text(), "st_dwithin"sv)) { parseDWithin(parser, nextOp); @@ -848,6 +858,7 @@ int SQLParser::parseWhere(tokenizer& parser) { } else if constexpr (nested == Nested::No) { if (iequals(tok.text(), "join"sv)) { parseJoin(JoinType::LeftJoin, parser); + } else if (iequals(tok.text(), "left"sv)) { peekSqlToken(parser, LeftSqlToken); if (parser.next_token().text() != "join"sv) { @@ -873,25 +884,35 @@ int SQLParser::parseWhere(tokenizer& parser) { } else if (tok.type == TokenNumber || tok.type == TokenString) { throw Error(errParseSQL, "%s is invalid at this location. (text = '%s' location = %s)", tok.type == TokenNumber ? "Number" : "String", tok.text(), parser.where()); + } else { + expectSecondLogicalOperand = true; } tok = parser.peek_token(); while (tok.text() == "equal_position"sv) { - parseEqualPositions(parser, equalPositions, lastBracketPosition); + parser.next_token(); + parseEqualPositions(parser); tok = parser.peek_token(); } while (openBracketsCount > 0 && tok.text() == ")"sv) { + throwIfExpectSecondLogicalOperand(); query_.CloseBracket(); --openBracketsCount; parser.next_token(); tok = parser.peek_token(); + while (tok.text() == "equal_position"sv) { + parser.next_token(); + parseEqualPositions(parser); + tok = parser.peek_token(); + } } tok = peekSqlToken(parser, WhereOpSqlToken, false); - if (iequals(tok.text(), "and"sv)) { + throwIfExpectSecondLogicalOperand(); nextOp = OpAnd; + expectSecondLogicalOperand = true; parser.next_token(); tok = peekSqlToken(parser, nested == Nested::Yes ? NestedAndSqlToken : AndSqlToken, false); if (iequals(tok.text(), "not"sv)) { @@ -901,55 +922,36 @@ int SQLParser::parseWhere(tokenizer& parser) { continue; } } else if (iequals(tok.text(), "or"sv)) { + throwIfExpectSecondLogicalOperand(); parser.next_token(); peekSqlToken(parser, FieldNameSqlToken); nextOp = OpOr; + expectSecondLogicalOperand = true; } else if (!iequals(tok.text(), "join"sv) && !iequals(tok.text(), "inner"sv) && !iequals(tok.text(), "left"sv)) { break; } } - for (auto& eqPos : equalPositions) { - if (eqPos.first == 0) { - query_.SetEqualPositions(std::move(eqPos.second)); - } else { - query_.SetEqualPositions(eqPos.first - 1, std::move(eqPos.second)); - } - } + throwIfExpectSecondLogicalOperand(); if (query_.Entries().Empty()) { throw Error(errParseSQL, "Expected condition after 'WHERE'"); } - return 0; } -void SQLParser::parseEqualPositions(tokenizer& parser, std::vector>& equalPositions, - size_t lastBracketPosition) { - parser.next_token(); +void SQLParser::parseEqualPositions(tokenizer& parser) { auto tok = parser.next_token(); if (tok.text() != "("sv) { throw Error(errParseSQL, "Expected '(', but found '%s', %s", tok.text(), parser.where()); } - EqualPosition_t fields; + EqualPosition_t fieldNames; for (;;) { auto nameWithCase = peekSqlToken(parser, FieldNameSqlToken); tok = parser.next_token(tokenizer::flags::no_flags); if (tok.type != TokenName) { throw Error(errParseSQL, "Expected name, but found '%s' in query, %s", tok.text(), parser.where()); } - bool validField = false; - for (auto it = query_.Entries().begin_of_current_bracket(); it != query_.Entries().end(); ++it) { - if (it->Is() && nameWithCase.text() == it->Value().FieldName()) { - validField = true; - break; - } - } - if (!validField) { - throw Error(errParseSQL, - "Only fields that present in 'Where' condition are allowed to use in equal_position(), but found '%s' in query, %s", - nameWithCase.text(), parser.where()); - } - fields.emplace_back(nameWithCase.text()); + fieldNames.emplace_back(nameWithCase.text()); tok = parser.next_token(tokenizer::flags::no_flags); if (tok.text() == ")"sv) { break; @@ -958,11 +960,7 @@ void SQLParser::parseEqualPositions(tokenizer& parser, std::vector>& equalPositions, size_t openBracketsCount); + void parseEqualPositions(tokenizer& parser); Point parseGeomFromText(tokenizer& parser) const; void parseDWithin(tokenizer& parser, OpType nextOp); diff --git a/cpp_src/core/querycache.h b/cpp_src/core/querycache.h index 566015220..6f323b29b 100644 --- a/cpp_src/core/querycache.h +++ b/cpp_src/core/querycache.h @@ -10,11 +10,12 @@ namespace reindexer { struct QueryCountCacheVal { QueryCountCacheVal() = default; - QueryCountCacheVal(size_t total) noexcept : total_count(total) {} + QueryCountCacheVal(size_t total) noexcept : totalCount(total) {} size_t Size() const noexcept { return 0; } + bool IsInitialized() const noexcept { return totalCount >= 0; } - int total_count = -1; + int totalCount = -1; }; constexpr uint8_t kCountCachedKeyMode = @@ -67,8 +68,6 @@ struct HashQueryCacheKey { } }; -using QueryCountCache = LRUCache; - -; +using QueryCountCache = LRUCache, LRUWithAtomicPtr::No>; } // namespace reindexer diff --git a/cpp_src/core/queryresults/itemref.h b/cpp_src/core/queryresults/itemref.h index 6ddc37642..fa2e346ad 100644 --- a/cpp_src/core/queryresults/itemref.h +++ b/cpp_src/core/queryresults/itemref.h @@ -3,18 +3,19 @@ #include "core/payload/payloadvalue.h" #include "core/type_consts.h" #include "estl/h_vector.h" +#include "tools/assertrx.h" namespace reindexer { static const int kDefaultQueryResultsSize = 32; class ItemRef { public: - ItemRef() : id_(0), proc_(0), raw_(0), valueInitialized_(false), nsid_(0) {} - ItemRef(IdType id, const PayloadValue& value, uint16_t proc = 0, uint16_t nsid = 0, bool raw = false) + ItemRef() noexcept : id_(0), proc_(0), raw_(0), valueInitialized_(false), nsid_(0) {} + ItemRef(IdType id, const PayloadValue& value, uint16_t proc = 0, uint16_t nsid = 0, bool raw = false) noexcept : id_(id), proc_(proc), raw_(raw), valueInitialized_(true), nsid_(nsid), value_(value) {} - ItemRef(IdType id, unsigned sortExprResultsIdx, uint16_t proc = 0, uint16_t nsid = 0) + ItemRef(IdType id, unsigned sortExprResultsIdx, uint16_t proc = 0, uint16_t nsid = 0) noexcept : id_(id), proc_(proc), raw_(0), valueInitialized_(false), nsid_(nsid), sortExprResultsIdx_(sortExprResultsIdx) {} - ItemRef(ItemRef&& other) + ItemRef(ItemRef&& other) noexcept : id_(other.id_), proc_(other.proc_), raw_(other.raw_), @@ -25,7 +26,7 @@ class ItemRef { new (&value_) PayloadValue(std::move(other.value_)); } } - ItemRef(const ItemRef& other) + ItemRef(const ItemRef& other) noexcept : id_(other.id_), proc_(other.proc_), raw_(other.raw_), @@ -36,7 +37,7 @@ class ItemRef { new (&value_) PayloadValue(other.value_); } } - ItemRef& operator=(ItemRef&& other) { + ItemRef& operator=(ItemRef&& other) noexcept { if (&other == this) { return *this; } @@ -61,7 +62,7 @@ class ItemRef { valueInitialized_ = other.valueInitialized_; return *this; } - ItemRef& operator=(const ItemRef& other) { + ItemRef& operator=(const ItemRef& other) noexcept { if (&other == this) { return *this; } diff --git a/cpp_src/core/queryresults/localqueryresults.cc b/cpp_src/core/queryresults/localqueryresults.cc index 6c6ee045c..412d90e87 100644 --- a/cpp_src/core/queryresults/localqueryresults.cc +++ b/cpp_src/core/queryresults/localqueryresults.cc @@ -75,10 +75,6 @@ LocalQueryResults::~LocalQueryResults() = default; void LocalQueryResults::Clear() { *this = LocalQueryResults(); } -void LocalQueryResults::Erase(ItemRefVector::iterator start, ItemRefVector::iterator finish) { items_.erase(start, finish); } - -void LocalQueryResults::Add(const ItemRef& i) { items_.push_back(i); } - // Used to save strings when converting the client result to the server. // The server item is created, inserted into the result and deleted // so that the rows are not deleted, they are saved in the results. @@ -426,10 +422,10 @@ Item LocalQueryResults::Iterator::GetItem(bool enableHold) { auto item = Item(new ItemImpl(ctx.type_, itemRef.Value(), ctx.tagsMatcher_, ctx.schema_)); item.impl_->payloadValue_.Clone(); if (enableHold) { - if (!item.impl_->keyStringsHolder_) { - item.impl_->keyStringsHolder_.reset(new std::vector); + if (!item.impl_->holder_) { + item.impl_->holder_ = std::make_unique(); } - Payload{ctx.type_, item.impl_->payloadValue_}.CopyStrings(*(item.impl_->keyStringsHolder_)); + Payload{ctx.type_, item.impl_->payloadValue_}.CopyStrings(*(item.impl_->holder_)); } item.setID(itemRef.Id()); @@ -444,14 +440,19 @@ void LocalQueryResults::AddItem(Item& item, bool withData, bool enableHold) { ctxs.emplace_back(ritem->Type(), ritem->tagsMatcher(), FieldsSet(), ritem->GetSchema(), ns ? ns->ns_->incarnationTag_ : lsn_t()); } - Add(ItemRef(item.GetID(), withData ? (ritem->RealValue().IsFree() ? ritem->Value() : ritem->RealValue()) : PayloadValue())); - if (withData && enableHold) { - if (ns) { - Payload{ns->ns_->payloadType_, items_.back().Value()}.CopyStrings(stringsHolder_); - } else { - assertrx(ctxs.size() == 1); - Payload{ctxs.back().type_, items_.back().Value()}.CopyStrings(stringsHolder_); + if (withData) { + auto& value = ritem->RealValue().IsFree() ? ritem->Value() : ritem->RealValue(); + AddItemRef(item.GetID(), value); + if (enableHold) { + if (auto ns{ritem->GetNamespace()}; ns) { + ConstPayload{ns->ns_->payloadType_, value}.CopyStrings(stringsHolder_); + } else { + assertrx(ctxs.size() == 1); + ConstPayload{ctxs.back().type_, value}.CopyStrings(stringsHolder_); + } } + } else { + AddItemRef(item.GetID(), PayloadValue()); } } } diff --git a/cpp_src/core/queryresults/localqueryresults.h b/cpp_src/core/queryresults/localqueryresults.h index 66cbcfb37..c4d580c76 100644 --- a/cpp_src/core/queryresults/localqueryresults.h +++ b/cpp_src/core/queryresults/localqueryresults.h @@ -44,13 +44,16 @@ class LocalQueryResults { ~LocalQueryResults(); LocalQueryResults& operator=(const LocalQueryResults&) = delete; LocalQueryResults& operator=(LocalQueryResults&& obj) noexcept; - void Add(const ItemRef&); + template + void AddItemRef(Args&&... args) { + items_.emplace_back(std::forward(args)...); + } // use enableHold = false only if you are sure that the LocalQueryResults will be destroyed before the item // or if data from the item are contained in namespace added to the LocalQueryResults // enableHold is ignored when withData = false void AddItem(Item& item, bool withData = false, bool enableHold = true); std::string Dump() const; - void Erase(ItemRefVector::iterator begin, ItemRefVector::iterator end); + void Erase(ItemRefVector::const_iterator begin, ItemRefVector::const_iterator end) { items_.erase(begin, end); } size_t Count() const noexcept { return items_.size(); } size_t TotalCount() const noexcept { return totalCount; } const std::string& GetExplainResults() const& noexcept { return explainResults; } @@ -96,6 +99,10 @@ class LocalQueryResults { bool operator!=(const Iterator& other) const noexcept { return idx_ != other.idx_; } bool operator==(const Iterator& other) const noexcept { return idx_ == other.idx_; } Iterator& operator*() noexcept { return *this; } + static Iterator SwitchQueryResultsPtrUnsafe(Iterator&& it, const LocalQueryResults& qr) { + it.qr_ = &qr; + return std::move(it); + } const LocalQueryResults* qr_; int idx_; diff --git a/cpp_src/core/queryresults/queryresults.cc b/cpp_src/core/queryresults/queryresults.cc index 12dbad9a7..dc51844ef 100644 --- a/cpp_src/core/queryresults/queryresults.cc +++ b/cpp_src/core/queryresults/queryresults.cc @@ -90,7 +90,7 @@ void QueryResults::AddQr(LocalQueryResults&& local, int shardID, bool buildMerge if (NeedOutputShardId()) { local.SetOutputShardId(shardID); } - local_ = std::make_unique>(std::move(local)); + local_.emplace(std::move(local)); local_->shardID = shardID; switch (type_) { case Type::None: @@ -121,19 +121,24 @@ void QueryResults::AddQr(client::QueryResults&& remote, int shardID, bool buildM } if (type_ == Type::None || remote.Count() != 0 || remote.TotalCount() != 0 || !remote.GetAggregationResults().empty()) { begin_.it = std::nullopt; - remote_.emplace_back(std::move(remote)); - remote_.back().shardID = shardID; + if (remote_.empty()) { + remote_.reserve(16u); + } + remote_.emplace_back(std::make_unique>(std::move(remote))); + remote_.back()->shardID = shardID; switch (type_) { case Type::None: type_ = Type::SingleRemote; - remote_[0].hasCompatibleTm = true; + remote_[0]->hasCompatibleTm = true; break; case Type::SingleRemote: type_ = Type::MultipleRemote; - remote_[0].hasCompatibleTm = false; + remote_[0]->hasCompatibleTm = false; break; case Type::Local: type_ = Type::Mixed; + assertrx_dbg(local_); + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) local_->hasCompatibleTm = false; break; case Type::MultipleRemote: @@ -152,6 +157,7 @@ void QueryResults::RebuildMergedData() { mergedData_.reset(); if (type_ == Type::Mixed) { assertrx(local_); + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) const auto nss = local_->qr.GetNamespaces(); if (nss.size() > 1) { throw Error(errLogic, "Local query result has %d namespaces, but distributed query results may have only 1", nss.size()); @@ -170,14 +176,14 @@ void QueryResults::RebuildMergedData() { assertrx(remote_.size()); for (auto& qrp : remote_) { - const auto nss = qrp.qr.GetNamespaces(); - const auto& agg = qrp.qr.GetAggregationResults(); + const auto nss = qrp->qr.GetNamespaces(); + const auto& agg = qrp->qr.GetAggregationResults(); if (mergedData_) { if (!iequals(mergedData_->pt.Name(), nss[0])) { throw Error(errLogic, "Query results in distributed query have different ns names: '%s' vs '%s'", mergedData_->pt.Name(), nss[0]); } - if (mergedData_->haveRank != qrp.qr.HaveRank() || mergedData_->needOutputRank != qrp.qr.NeedOutputRank()) { + if (mergedData_->haveRank != qrp->qr.HaveRank() || mergedData_->needOutputRank != qrp->qr.NeedOutputRank()) { throw Error(errLogic, "Rank options are incompatible between query results inside distributed query results"); } if (mergedData_->aggregationResults.size() != agg.size()) { @@ -221,7 +227,7 @@ void QueryResults::RebuildMergedData() { } } } else { - mergedData_ = std::make_unique(std::string(nss[0]), qrp.qr.HaveRank(), qrp.qr.NeedOutputRank()); + mergedData_ = std::make_unique(std::string(nss[0]), qrp->qr.HaveRank(), qrp->qr.NeedOutputRank()); for (const auto& a : agg) { if (a.type == AggAvg || a.type == AggFacet || a.type == AggDistinct || a.type == AggUnknown) { throw Error(errLogic, "Remote query result (within distributed results) has unsupported aggregations"); @@ -239,7 +245,7 @@ void QueryResults::RebuildMergedData() { mergedData_->pt = local_->qr.getPayloadType(0); } for (auto& qrp : remote_) { - tmList.emplace_back(qrp.qr.GetTagsMatcher(0)); + tmList.emplace_back(qrp->qr.GetTagsMatcher(0)); } mergedData_->tm = TagsMatcher::CreateMergedTagsMatcher(tmList); @@ -247,7 +253,7 @@ void QueryResults::RebuildMergedData() { local_->hasCompatibleTm = local_->qr.getTagsMatcher(0).IsSubsetOf(mergedData_->tm); } for (auto& qrp : remote_) { - qrp.hasCompatibleTm = qrp.qr.GetTagsMatcher(0).IsSubsetOf(mergedData_->tm); + qrp->hasCompatibleTm = qrp->qr.GetTagsMatcher(0).IsSubsetOf(mergedData_->tm); } } catch (...) { mergedData_.reset(); @@ -264,10 +270,12 @@ const std::vector& QueryResults::GetAggregationResults() & { return kEmpty; } case Type::Local: { + assertrx_dbg(local_); + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) return local_->qr.GetAggregationResults(); } case Type::SingleRemote: { - return remote_[0].qr.GetAggregationResults(); + return remote_[0]->qr.GetAggregationResults(); } case Type::MultipleRemote: case Type::Mixed: @@ -281,9 +289,11 @@ h_vector QueryResults::GetNamespaces() const { case Type::None: return h_vector(); case Type::Local: + assertrx_dbg(local_); + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) return local_->qr.GetNamespaces(); case Type::SingleRemote: - return remote_[0].qr.GetNamespaces(); + return remote_[0]->qr.GetNamespaces(); case Type::MultipleRemote: case Type::Mixed: default: @@ -296,19 +306,22 @@ bool QueryResults::IsCacheEnabled() const noexcept { case Type::None: return true; case Type::Local: + assertrx_dbg(local_); + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) return local_->qr.IsCacheEnabled(); case Type::SingleRemote: case Type::MultipleRemote: case Type::Mixed: default: { - bool res = true; - if (local_) { - res = res && local_->qr.IsCacheEnabled(); + if (local_ && !local_->qr.IsCacheEnabled()) { + return false; } for (auto& qrp : remote_) { - res = res && qrp.qr.IsCacheEnabled(); + if (!qrp->qr.IsCacheEnabled()) { + return false; + } } - return res; + return true; } } } @@ -318,7 +331,7 @@ bool QueryResults::HaveShardIDs() const noexcept { return true; } for (auto& qrp : remote_) { - if (qrp.shardID != ShardingKeyType::ProxyOff) { + if (qrp->shardID != ShardingKeyType::ProxyOff) { return true; } } @@ -330,9 +343,11 @@ int QueryResults::GetCommonShardID() const { case Type::None: return -1; case Type::Local: + assertrx_dbg(local_); + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) return local_->shardID; case Type::SingleRemote: - return remote_[0].shardID; + return remote_[0]->shardID; case Type::MultipleRemote: case Type::Mixed: break; @@ -343,11 +358,11 @@ int QueryResults::GetCommonShardID() const { } for (auto& qrp : remote_) { if (shardId.has_value()) { - if (qrp.shardID != *shardId) { - throw Error(errLogic, "Distributed query results does not have common shard id (%d vs %d)", qrp.shardID, *shardId); + if (qrp->shardID != *shardId) { + throw Error(errLogic, "Distributed query results does not have common shard id (%d vs %d)", qrp->shardID, *shardId); } } else { - shardId = qrp.shardID; + shardId = qrp->shardID; } } return shardId.has_value() ? *shardId : ShardingKeyType::ProxyOff; @@ -358,9 +373,11 @@ PayloadType QueryResults::GetPayloadType(int nsid) const noexcept { case Type::None: return PayloadType(); case Type::Local: + assertrx_dbg(local_); + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) return local_->qr.getPayloadType(nsid); case Type::SingleRemote: - return remote_[0].qr.GetPayloadType(nsid); + return remote_[0]->qr.GetPayloadType(nsid); case Type::MultipleRemote: case Type::Mixed: default: @@ -373,9 +390,11 @@ TagsMatcher QueryResults::GetTagsMatcher(int nsid) const noexcept { case Type::None: return TagsMatcher(); case Type::Local: + assertrx_dbg(local_); + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) return local_->qr.getTagsMatcher(nsid); case Type::SingleRemote: - return remote_[0].qr.GetTagsMatcher(nsid); + return remote_[0]->qr.GetTagsMatcher(nsid); case Type::MultipleRemote: case Type::Mixed: default: @@ -388,9 +407,11 @@ bool QueryResults::HaveRank() const noexcept { case Type::None: return false; case Type::Local: + assertrx_dbg(local_); + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) return local_->qr.haveRank; case Type::SingleRemote: - return remote_[0].qr.HaveRank(); + return remote_[0]->qr.HaveRank(); case Type::MultipleRemote: case Type::Mixed: break; @@ -403,9 +424,11 @@ bool QueryResults::NeedOutputRank() const noexcept { case Type::None: return false; case Type::Local: + assertrx_dbg(local_); + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) return local_->qr.needOutputRank; case Type::SingleRemote: - return remote_[0].qr.NeedOutputRank(); + return remote_[0]->qr.NeedOutputRank(); case Type::MultipleRemote: case Type::Mixed: break; @@ -418,9 +441,11 @@ bool QueryResults::HaveJoined() const noexcept { case Type::None: return false; case Type::Local: + assertrx_dbg(local_); + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) return local_->qr.joined_.size(); case Type::SingleRemote: - return remote_[0].qr.HaveJoined(); + return remote_[0]->qr.HaveJoined(); case Type::MultipleRemote: case Type::Mixed: break; @@ -457,8 +482,12 @@ uint32_t QueryResults::GetJoinedField(int parentNsId) const noexcept { joinedField += qData_->mergedJoinedSizes[ns]; } } else if (type_ == Type::Local) { + assertrx_dbg(local_); + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) joinedField = local_->qr.joined_.size(); for (int ns = 0; ns < parentNsId; ++ns) { + assertrx_dbg(local_); + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) joinedField += local_->qr.joined_[size_t(ns)].GetJoinedSelectorsCount(); } } @@ -482,6 +511,7 @@ Error QueryResults::Iterator::GetCJSON(WrSerializer& wrser, bool withHdrLen) { case Type::None: return Error(errLogic, "QueryResults are empty"); case Type::Local: + assertrx_dbg(qr_->local_); // NOLINTNEXTLINE(bugprone-unchecked-optional-access) return localIt_->GetCJSON(wrser, withHdrLen); case Type::SingleRemote: @@ -490,19 +520,22 @@ Error QueryResults::Iterator::GetCJSON(WrSerializer& wrser, bool withHdrLen) { break; } - Error err = std::visit(overloaded{[&](LocalQueryResults::Iterator it) { - if (qr_->local_->hasCompatibleTm) { - return it.GetCJSON(wrser, withHdrLen); - } - return getCJSONviaJSON(wrser, withHdrLen, it); - }, - [&](client::QueryResults::Iterator it) { - if (qr_->type_ == Type::SingleRemote || qr_->remote_[size_t(qr_->curQrId_)].hasCompatibleTm) { - return it.GetCJSON(wrser, withHdrLen); - } - return getCJSONviaJSON(wrser, withHdrLen, it); - }}, - getVariantIt()); + Error err = + std::visit(overloaded{[&](LocalQueryResults::Iterator it) { + assertrx_dbg(qr_->local_); + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) + if (qr_->local_->hasCompatibleTm) { + return it.GetCJSON(wrser, withHdrLen); + } + return getCJSONviaJSON(wrser, withHdrLen, it); + }, + [&](client::QueryResults::Iterator it) { + if (qr_->type_ == Type::SingleRemote || qr_->remote_[size_t(qr_->curQrId_)]->hasCompatibleTm) { + return it.GetCJSON(wrser, withHdrLen); + } + return getCJSONviaJSON(wrser, withHdrLen, it); + }}, + getVariantIt()); return err; } catch (Error& e) { return e; @@ -536,6 +569,7 @@ Item QueryResults::Iterator::GetItem(bool enableHold) { case Type::None: return Item(); case Type::Local: + assertrx_dbg(qr_->local_); // NOLINTNEXTLINE(bugprone-unchecked-optional-access) return localIt_->GetItem(enableHold); case Type::SingleRemote: @@ -550,12 +584,14 @@ Item QueryResults::Iterator::GetItem(bool enableHold) { auto& mData = qr_->getMergedData(); itemImpl.reset(new ItemImpl(mData.pt, mData.tm)); } else { - auto& remoteQr = qr_->remote_[size_t(qr_->curQrId_)].qr; + auto& remoteQr = qr_->remote_[size_t(qr_->curQrId_)]->qr; const int nsId = std::get(vit).GetNSID(); itemImpl.reset(new ItemImpl(remoteQr.GetPayloadType(nsId), remoteQr.GetTagsMatcher(nsId))); } Item item = std::visit(overloaded{[&](LocalQueryResults::Iterator& it) { + assertrx_dbg(qr_->local_); + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) auto item = getItem(it, std::move(itemImpl), !qr_->local_->hasCompatibleTm); item.setID(it.GetItemRef().Id()); item.setLSN(it.GetItemRef().Value().GetLSN()); @@ -563,9 +599,9 @@ Item QueryResults::Iterator::GetItem(bool enableHold) { return item; }, [&](client::QueryResults::Iterator& it) { - auto item = getItem(it, std::move(itemImpl), - !qr_->remote_[size_t(qr_->curQrId_)].hasCompatibleTm || - !qr_->remote_[size_t(qr_->curQrId_)].qr.IsCJSON()); + auto& remoteQr = *qr_->remote_[size_t(qr_->curQrId_)]; + auto item = + getItem(it, std::move(itemImpl), !remoteQr.hasCompatibleTm || !remoteQr.qr.IsCJSON()); item.setID(it.GetID()); assertrx(!it.GetLSN().isEmpty()); item.setLSN(it.GetLSN()); @@ -598,12 +634,13 @@ void QueryResults::QrMetaData::ResetJoinStorage(int64_t idx) const { joins::ItemIterator QueryResults::Iterator::GetJoined(std::vector* storage) { if (qr_->type_ == Type::Local) { + assertrx_dbg(localIt_); // NOLINTNEXTLINE(bugprone-unchecked-optional-access) return localIt_->GetJoined(); } else if (qr_->type_ == Type::SingleRemote) { validateProxiedIterator(); - auto rit = qr_->remote_[0].it; + auto rit = qr_->remote_[0]->it; const auto& joinedData = rit.GetJoined(); if (!joinedData.size()) { return joins::ItemIterator::CreateEmpty(); @@ -612,7 +649,7 @@ joins::ItemIterator QueryResults::Iterator::GetJoined(std::vector* throw Error(errLogic, "Unable to init joined data without initial query"); } - auto& rqr = qr_->remote_[0]; + auto& rqr = *qr_->remote_[0]; if (storage || !rqr.CheckIfNsJoinStorageHasSameIdx(idx_)) { try { rqr.ResetJoinStorage(idx_); @@ -628,10 +665,10 @@ joins::ItemIterator QueryResults::Iterator::GetJoined(std::vector* LocalQueryResults qrJoined; const auto& joinedItems = joinedData[i]; for (const auto& itemData : joinedItems) { - ItemImpl itemimpl(qr_->remote_[0].qr.GetPayloadType(jField), qr_->remote_[0].qr.GetTagsMatcher(jField)); + ItemImpl itemimpl(rqr.qr.GetPayloadType(jField), rqr.qr.GetTagsMatcher(jField)); itemimpl.FromCJSON(itemData.data); - qrJoined.Add(ItemRef(itemData.id, itemimpl.Value(), itemData.proc, itemData.nsid, true)); + qrJoined.AddItemRef(itemData.id, itemimpl.Value(), itemData.proc, itemData.nsid, true); if (!storage) { rqr.NsJoinRes()->data.joinedRawData.emplace_back(std::move(itemimpl)); } else { @@ -648,7 +685,7 @@ joins::ItemIterator QueryResults::Iterator::GetJoined(std::vector* } } - return joins::ItemIterator(&(qr_->remote_[0].NsJoinRes()->data.jr), rit.itemParams_.id); + return joins::ItemIterator(&(rqr.NsJoinRes()->data.jr), rit.itemParams_.id); } // Distributed queries can not have joins return reindexer::joins::ItemIterator::CreateEmpty(); @@ -959,6 +996,8 @@ class QueryResults::Comparator { ItemRef liref, riref; int lShardId, rShardId; if (lhs < 0) { + assertrx_dbg(qr_.local_); + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) const auto& lqr = *qr_.local_; liref = lqr.it.GetItemRef(); ltm = lqr.qr.getTagsMatcher(0); @@ -966,13 +1005,15 @@ class QueryResults::Comparator { lShardId = lqr.shardID; } else { assertrx(static_cast(lhs) < qr_.remote_.size()); - auto& rqr = qr_.remote_[lhs]; + auto& rqr = *qr_.remote_[lhs]; liref = rqr.ItemRefData(qr_.curQrId_).data.ref; ltm = rqr.qr.GetTagsMatcher(0); lpt = rqr.qr.GetPayloadType(0); lShardId = rqr.shardID; } if (rhs < 0) { + assertrx_dbg(qr_.local_); + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) const auto& lqr = *qr_.local_; riref = lqr.it.GetItemRef(); rtm = lqr.qr.getTagsMatcher(0); @@ -980,7 +1021,7 @@ class QueryResults::Comparator { rShardId = lqr.shardID; } else { assertrx(static_cast(rhs) < qr_.remote_.size()); - auto& rqr = qr_.remote_[rhs]; + auto& rqr = *qr_.remote_[rhs]; riref = rqr.ItemRefData(qr_.curQrId_).data.ref; rtm = rqr.qr.GetTagsMatcher(0); rpt = rqr.qr.GetPayloadType(0); @@ -1015,6 +1056,8 @@ void QueryResults::SetOrdering(const Query& q, const NamespaceImpl& ns, const Rd void QueryResults::beginImpl() const { if (type_ == Type::Local) { + assertrx_dbg(local_); + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) begin_.it = Iterator{this, 0, {local_->qr.begin() + std::min(offset, count())}}; } else { begin_.it = Iterator{this, 0, std::nullopt}; @@ -1032,6 +1075,7 @@ QueryResults::Iterator& QueryResults::Iterator::operator++() { *this = qr_->end(); return *this; case Type::Local: + assertrx_dbg(localIt_); // NOLINTNEXTLINE(bugprone-unchecked-optional-access) ++(*localIt_); return *this; @@ -1049,14 +1093,18 @@ QueryResults::Iterator& QueryResults::Iterator::operator++() { auto* qr = const_cast(qr_); if (!qr_->orderedQrs_ || qr_->type_ == Type::SingleRemote) { if (qr->curQrId_ < 0) { + assertrx_dbg(qr->local_); + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) ++qr->local_->it; ++qr->lastSeenIdx_; ++idx_; + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) if (qr->local_->it == qr->local_->qr.end()) { + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) qr->curQrId_ = qr->findFirstQrWithItems(qr->local_->shardID); } } else if (size_t(qr->curQrId_) < qr_->remote_.size()) { - auto& remoteQrp = qr->remote_[size_t(qr_->curQrId_)]; + auto& remoteQrp = *qr->remote_[size_t(qr_->curQrId_)]; ++remoteQrp.it; ++qr->lastSeenIdx_; ++idx_; @@ -1071,6 +1119,8 @@ QueryResults::Iterator& QueryResults::Iterator::operator++() { assertrx(*qr->orderedQrs_->begin() == qrId); auto oNode = qr->orderedQrs_->extract(qr->orderedQrs_->begin()); if (qrId < 0) { + assertrx_dbg(qr->local_); + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) ++qr->local_->it; if (qr->local_->it != qr->local_->qr.end()) { oNode.value() = -1; @@ -1078,8 +1128,9 @@ QueryResults::Iterator& QueryResults::Iterator::operator++() { } } else { assertrx(static_cast(qrId) < qr_->remote_.size()); - ++qr->remote_[qrId].it; - if (qr->remote_[qrId].it != qr->remote_[qrId].qr.end()) { + auto& remoteQrp = *qr->remote_[qrId]; + ++remoteQrp.it; + if (remoteQrp.it != remoteQrp.qr.end()) { oNode.value() = qrId; qr->orderedQrs_->insert(std::move(oNode)); } @@ -1117,6 +1168,7 @@ ItemRef QueryResults::Iterator::GetItemRef(ProxiedRefsStorage* storage) { case Type::None: return ItemRef(); case Type::Local: + assertrx_dbg(localIt_); // NOLINTNEXTLINE(bugprone-unchecked-optional-access) return localIt_->GetItemRef(); case Type::SingleRemote: @@ -1128,9 +1180,9 @@ ItemRef QueryResults::Iterator::GetItemRef(ProxiedRefsStorage* storage) { overloaded{[](QrMetaData* qr) noexcept { return qr->it.GetItemRef(); }, [&](QrMetaData* qr) { if (!qr->CheckIfItemRefStorageHasSameIdx(idx_) || storage) { + auto& remoteQr = *qr_->remote_[size_t(qr_->curQrId_)]; ItemImpl itemimpl(qr_->GetPayloadType(0), qr_->GetTagsMatcher(0)); - const bool convertViaJSON = - !qr_->remote_[size_t(qr_->curQrId_)].hasCompatibleTm || !qr_->remote_[size_t(qr_->curQrId_)].qr.IsCJSON(); + const bool convertViaJSON = !remoteQr.hasCompatibleTm || !remoteQr.qr.IsCJSON(); Error err = fillItemImpl(qr->it, itemimpl, convertViaJSON); if (!err.ok()) { throw err; @@ -1167,8 +1219,9 @@ int QueryResults::findFirstQrWithItems(int minShardId) { orderedQrs_->emplace(-1); } for (int i = 0, size = remote_.size(); i < size; ++i) { - if (remote_[i].qr.Count()) { - assertrx(remote_[i].it == remote_[i].qr.begin()); + auto& remote = *remote_[i]; + if (remote.qr.Count()) { + assertrx(remote.it == remote.qr.begin()); orderedQrs_->emplace(i); } } @@ -1185,9 +1238,10 @@ int QueryResults::findFirstQrWithItems(int minShardId) { foundShardId = local_->shardID; } for (int i = 0, size = remote_.size(); i < size; ++i) { - if (remote_[i].qr.Count() && remote_[i].shardID < foundShardId && remote_[i].shardID > minShardId) { + auto& remote = *remote_[i]; + if (remote.qr.Count() && remote.shardID < foundShardId && remote.shardID > minShardId) { foundPos = i; - foundShardId = remote_[i].shardID; + foundShardId = remote.shardID; } } return foundPos; diff --git a/cpp_src/core/queryresults/queryresults.h b/cpp_src/core/queryresults/queryresults.h index db4a95bb8..6adbf33e9 100644 --- a/cpp_src/core/queryresults/queryresults.h +++ b/cpp_src/core/queryresults/queryresults.h @@ -51,9 +51,27 @@ class QueryResults { public: QrMetaData(QrT&& _qr = QrT()) : qr(std::move(_qr)), it(qr.begin()) {} QrMetaData(const QrMetaData&) = delete; - QrMetaData(QrMetaData&&) = delete; + QrMetaData(QrMetaData&& o) noexcept + : qr(std::move(o.qr)), + it(QrT::Iterator::SwitchQueryResultsPtrUnsafe(std::move(o.it), qr)), + hasCompatibleTm(o.hasCompatibleTm), + shardID(o.shardID), + itemRefData_(std::move(o.itemRefData_)), + nsJoinRes_(std::move(o.nsJoinRes_)) {} QrMetaData& operator=(const QrMetaData&) = delete; - QrMetaData& operator=(QrMetaData&&) = delete; + QrMetaData& operator=(QrMetaData&& o) noexcept { + if (this != &o) { + qr = std::move(o.qr); + // SwitchQueryResultsPtrUnsafe is not implemented for client query results - iterator contains to many different pointer + // and it is unsafe to move it + it = QrT::Iterator::SwitchQueryResultsPtrUnsafe(std::move(o.it), qr); + hasCompatibleTm = o.hasCompatibleTm; + shardID = o.shardID; + itemRefData_ = std::move(o.itemRefData_); + nsJoinRes_ = std::move(o.nsJoinRes_); + } + return *this; + } QrT qr; typename QrT::Iterator it; @@ -78,7 +96,7 @@ class QueryResults { cnt += local_->qr.Count(); } for (const auto& qrp : remote_) { - cnt += qrp.qr.Count(); + cnt += qrp->qr.Count(); } return cnt; } @@ -106,8 +124,8 @@ class QueryResults { cnt += local_->qr.TotalCount(); } for (const auto& qrp : remote_) { - if (qrp.qr.TotalCount() > 0) { - cnt += size_t(qrp.qr.TotalCount()); + if (qrp->qr.TotalCount() > 0) { + cnt += size_t(qrp->qr.TotalCount()); } } return cnt; @@ -136,7 +154,7 @@ class QueryResults { case Type::Local: return local_->qr.GetExplainResults(); case Type::SingleRemote: - return remote_[0].qr.GetExplainResults(); + return remote_[0]->qr.GetExplainResults(); case Type::Mixed: if (local_->qr.explainResults.size()) { throw Error(errForbidden, "Explain is not supported for distribute queries"); @@ -144,7 +162,7 @@ class QueryResults { [[fallthrough]]; case Type::MultipleRemote: for (auto& qrp : remote_) { - if (qrp.qr.GetExplainResults().size()) { + if (qrp->qr.GetExplainResults().size()) { throw Error(errForbidden, "Explain is not supported for distribute queries"); } } @@ -166,7 +184,7 @@ class QueryResults { return local_->qr.getMergedNSCount(); } case Type::SingleRemote: { - return remote_[0].qr.GetMergedNSCount(); + return remote_[0]->qr.GetMergedNSCount(); } case Type::MultipleRemote: case Type::Mixed: @@ -195,7 +213,7 @@ class QueryResults { return ret; } case Type::SingleRemote: { - auto& remote = remote_[0]; + auto& remote = *remote_[0]; auto& remoteTags = remote.qr.GetIncarnationTags(); if (remoteTags.empty()) { return ret; @@ -220,7 +238,7 @@ class QueryResults { [[fallthrough]]; case Type::MultipleRemote: for (auto& r : remote_) { - auto& remoteTags = r.qr.GetIncarnationTags(); + auto& remoteTags = r->qr.GetIncarnationTags(); if (remoteTags.empty()) { continue; } @@ -228,7 +246,7 @@ class QueryResults { throw Error(errLogic, "Unexpected shards count in the remote query results"); } auto& tags = ret.emplace_back(remoteTags[0]); - tags.shardId = r.shardID; + tags.shardId = r->shardID; } return ret; } @@ -269,23 +287,24 @@ class QueryResults { bool IsWALQuery() const noexcept { return qData_.has_value() && qData_->isWalQuery; } uint32_t GetJoinedField(int parentNsId) const noexcept; bool IsRawProxiedBufferAvailable(int flags) const noexcept { - if (type_ != Type::SingleRemote || !remote_[0].qr.IsInLazyMode()) { + if (type_ != Type::SingleRemote || !remote_[0]->qr.IsInLazyMode()) { return false; } + auto& remote = *remote_[0]; const auto qrFlags = - remote_[0].qr.GetFlags() ? (remote_[0].qr.GetFlags() & ~kResultsWithPayloadTypes & ~kResultsWithShardId) : kResultsCJson; + remote.qr.GetFlags() ? (remote.qr.GetFlags() & ~kResultsWithPayloadTypes & ~kResultsWithShardId) : kResultsCJson; const auto qrFormat = qrFlags & kResultsFormatMask; const auto reqFlags = flags ? (flags & ~kResultsWithPayloadTypes & ~kResultsWithShardId) : kResultsCJson; const auto reqFormat = reqFlags & kResultsFormatMask; return qrFormat == reqFormat && (qrFlags & reqFlags) == reqFlags; } - bool GetRawProxiedBuffer(client::ParsedQrRawBuffer& out) { return remote_[0].qr.GetRawBuffer(out); } + bool GetRawProxiedBuffer(client::ParsedQrRawBuffer& out) { return remote_[0]->qr.GetRawBuffer(out); } void FetchRawBuffer(int flgs, int off, int lim) { if (!IsRawProxiedBufferAvailable(flgs)) { throw Error(errLogic, "Raw buffer is not available"); } - remote_[0].qr.FetchNextResults(flgs, off, lim); + remote_[0]->qr.FetchNextResults(flgs, off, lim); } void SetFlags(int flags) { if (GetType() != Type::None) { @@ -339,7 +358,7 @@ class QueryResults { if (qr_->curQrId_ < 0) { return qr_->local_->shardID; } - return qr_->remote_[size_t(qr_->curQrId_)].shardID; + return qr_->remote_[size_t(qr_->curQrId_)]->shardID; } bool IsRaw() const { struct { @@ -403,7 +422,7 @@ class QueryResults { if (qr_->curQrId_ < 0) { return Error(); } - return qr_->remote_[qr_->curQrId_].it.Status(); + return qr_->remote_[qr_->curQrId_]->it.Status(); } bool operator!=(const Iterator& other) const noexcept { return !(*this == other); } bool operator==(const Iterator& other) const noexcept { @@ -443,7 +462,7 @@ class QueryResults { return &(*qr->local_); } if (size_t(qr_->curQrId_) < qr->remote_.size()) { - return &qr->remote_[size_t(qr_->curQrId_)]; + return qr->remote_[size_t(qr_->curQrId_)].get(); } throw Error(errNotValid, "Iterator is not valid"); } @@ -466,7 +485,7 @@ class QueryResults { return qr->local_->it; } if (size_t(qr_->curQrId_) < qr->remote_.size()) { - return qr->remote_[size_t(qr_->curQrId_)].it; + return qr->remote_[size_t(qr_->curQrId_)]->it; } throw Error(errNotValid, "Iterator is not valid"); } @@ -523,8 +542,9 @@ class QueryResults { int64_t shardingConfigVersion_ = ShardingSourceId::NotSet; std::unique_ptr mergedData_; // Merged data of distributed query results - std::unique_ptr> local_; - std::deque> remote_; + std::optional> local_; + // We could use std::deque to make QrMetaData non-movable, but deque's default constructor performs allocation in GCC's implementation + std::vector>> remote_; int64_t lastSeenIdx_ = 0; int curQrId_ = -1; Type type_ = Type::None; diff --git a/cpp_src/core/reindexer_impl/reindexerimpl.cc b/cpp_src/core/reindexer_impl/reindexerimpl.cc index a9a286a38..e63697219 100644 --- a/cpp_src/core/reindexer_impl/reindexerimpl.cc +++ b/cpp_src/core/reindexer_impl/reindexerimpl.cc @@ -10,10 +10,8 @@ #include "core/defnsconfigs.h" #include "core/iclientsstats.h" #include "core/index/index.h" -#include "core/itemimpl.h" #include "core/nsselecter/querypreprocessor.h" #include "core/query/sql/sqlsuggester.h" -#include "core/queryresults/joinresults.h" #include "core/selectfunc/selectfunc.h" #include "debug/crashqueryreporter.h" #include "rx_selector.h" @@ -219,11 +217,11 @@ Error ReindexerImpl::enableStorage(const std::string& storagePath) { watcher.SetDirectory(storagePath_); } auto err = readClusterConfigFile(); - if (res.ok()) { + if (!err.ok() && res.ok()) { res = Error(err.code(), "Failed to read cluster config file: '%s'", err.what()); } err = readShardingConfigFile(); - if (res.ok()) { + if (!err.ok() && res.ok()) { res = Error(err.code(), "Failed to read sharding config file: '%s'", err.what()); } @@ -729,7 +727,7 @@ Error ReindexerImpl::renameNamespace(std::string_view srcNsName, const std::stri const RdxContext& rdxCtx) { Namespace::Ptr dstNs, srcNs; try { - if (std::string_view(dstNsName) == srcNsName.data()) { + if (std::string_view(dstNsName) == srcNsName) { return {}; } if (isSystemNamespaceNameStrict(srcNsName)) { @@ -1097,7 +1095,8 @@ Error ReindexerImpl::Select(const Query& q, LocalQueryResults& result, const Rdx RxSelector::NsLocker locks(rdxCtx); auto mainNsWrp = getNamespace(q.NsName(), rdxCtx); - auto mainNs = q.IsWALQuery() ? mainNsWrp->awaitMainNs(rdxCtx) : mainNsWrp->getMainNs(); + const bool isWalQuery = q.IsWALQuery(); + auto mainNs = isWalQuery ? mainNsWrp->awaitMainNs(rdxCtx) : mainNsWrp->getMainNs(); const auto queriesPerfStatsEnabled = configProvider_.QueriesPerfStatsEnabled(); const auto queriesThresholdUS = configProvider_.QueriesThresholdUS(); @@ -1115,12 +1114,11 @@ Error ReindexerImpl::Select(const Query& q, LocalQueryResults& result, const Rdx auto hitter = queriesPerfStatsEnabled ? [&sql, &tracker](bool lockHit, std::chrono::microseconds time) { - if (lockHit){ + if (lockHit) { tracker.LockHit(sql, time); -} - else{ + } else { tracker.Hit(sql, time); -} + } } : std::function{}; const bool isSystemNsRequest = isSystemNamespaceNameFast(q.NsName()); @@ -1138,12 +1136,13 @@ Error ReindexerImpl::Select(const Query& q, LocalQueryResults& result, const Rdx mainNs->updateSelectTime(); locks.Add(std::move(mainNs)); struct { + bool isWalQuery; RxSelector::NsLocker& locks; const RdxContext& ctx; - } refs{locks, rdxCtx}; + } refs{isWalQuery, locks, rdxCtx}; q.WalkNested(false, true, true, [this, &refs](const Query& q) { auto nsWrp = getNamespace(q.NsName(), refs.ctx); - auto ns = q.IsWALQuery() ? nsWrp->awaitMainNs(refs.ctx) : nsWrp->getMainNs(); + auto ns = refs.isWalQuery ? nsWrp->awaitMainNs(refs.ctx) : nsWrp->getMainNs(); ns->updateSelectTime(); refs.locks.Add(std::move(ns)); }); @@ -1162,13 +1161,13 @@ Error ReindexerImpl::Select(const Query& q, LocalQueryResults& result, const Rdx RxSelector::DoSelect(q, result, locks, func, rdxCtx, statCalculator); func.Process(result); } catch (const Error& err) { - if (rdxCtx.Compl()) { - rdxCtx.Compl()(err); + if (auto cmpl = rdxCtx.Compl(); cmpl) { + cmpl(err); } return err; } - if (rdxCtx.Compl()) { - rdxCtx.Compl()(errOK); + if (auto cmpl = rdxCtx.Compl(); cmpl) { + cmpl(Error()); } return Error(); } @@ -2224,8 +2223,8 @@ Error ReindexerImpl::ApplySnapshotChunk(std::string_view nsName, const SnapshotC } bool ReindexerImpl::isSystemNamespaceNameStrict(std::string_view name) noexcept { - return std::find_if(kSystemNsDefs.begin(), kSystemNsDefs.end(), - [name](const NamespaceDef& nsDef) { return iequals(nsDef.name, name); }) != kSystemNsDefs.end(); + return std::find_if(std::cbegin(kSystemNsDefs), std::cend(kSystemNsDefs), + [name](const NamespaceDef& nsDef) { return iequals(nsDef.name, name); }) != std::cend(kSystemNsDefs); } Error ReindexerImpl::SuggestLeader(const cluster::NodeData& suggestion, cluster::NodeData& response) { @@ -2274,11 +2273,6 @@ Error ReindexerImpl::getLeaderDsn(DSN& dsn, unsigned short serverId, const clust return Error(errLogic, "Leader serverId is missing in the config."); } -template -static auto makeUpdateRecord(Tuple&& t, std::index_sequence) { - return UpdateRecord{std::get(std::forward(t))...}; -} - template Error ReindexerImpl::shardingConfigReplAction(const RdxContext& ctx, PreReplFunc func, Args&&... args) noexcept { try { @@ -2288,7 +2282,8 @@ Error ReindexerImpl::shardingConfigReplAction(const RdxContext& ctx, PreReplFunc } return observers_.SendUpdate( - makeUpdateRecord(func(std::forward(args)...), std::make_index_sequence>{}), + std::apply([](auto&&... args) { return UpdateRecord{std::forward(args)...}; }, + func(std::forward(args)...)), [&wlck] { assertrx(wlck.isClusterLck()); wlck.unlock(); diff --git a/cpp_src/core/selectfunc/functions/debugrank.cc b/cpp_src/core/selectfunc/functions/debugrank.cc index cee1d5b74..68ad444d7 100644 --- a/cpp_src/core/selectfunc/functions/debugrank.cc +++ b/cpp_src/core/selectfunc/functions/debugrank.cc @@ -40,7 +40,7 @@ bool DebugRank::Process(ItemRef& res, PayloadType& plType, const SelectFuncStruc throw Error(errLogic, "Unable to apply debug_rank function to the non-string field '%s'", func.field); } - const std::string* data = p_string(kr[0]).getCxxstr(); + const std::string_view data = std::string_view(p_string(kr[0])); const auto pva = dataFtCtx.area[it->second].GetAreas(func.fieldNo); if (!pva || pva->Empty()) { @@ -51,7 +51,7 @@ bool DebugRank::Process(ItemRef& res, PayloadType& plType, const SelectFuncStruc std::string resultString; auto splitterTask = ftctx->GetData()->splitter->CreateTask(); - splitterTask->SetText(*data); + splitterTask->SetText(data); static const std::string_view startString = ""; static const std::string_view endString = ""; @@ -63,7 +63,7 @@ bool DebugRank::Process(ItemRef& res, PayloadType& plType, const SelectFuncStruc bool next = false; int endStringCount = 0; std::pair pos = splitterTask->Convert(areaVector[id].start, areaVector[id].end); - resultString += std::string_view(data->c_str() + beforeStr, pos.first - beforeStr); + resultString += std::string_view(data.data() + beforeStr, pos.first - beforeStr); do { next = false; switch (areaVector[id].phraseMode) { @@ -86,13 +86,13 @@ bool DebugRank::Process(ItemRef& res, PayloadType& plType, const SelectFuncStruc next = true; } } while (next); - resultString += std::string_view(data->c_str() + pos.first, pos.second - pos.first); + resultString += std::string_view(data.data() + pos.first, pos.second - pos.first); beforeStr = pos.second; for (int i = 0; i < endStringCount; i++) { resultString += endString; } } - resultString += std::string_view(data->c_str() + beforeStr, data->size() - beforeStr); + resultString += std::string_view(data.data() + beforeStr, data.size() - beforeStr); stringsHolder.emplace_back(make_key_string(std::move(resultString))); res.Value().Clone(); @@ -101,4 +101,4 @@ bool DebugRank::Process(ItemRef& res, PayloadType& plType, const SelectFuncStruc return true; } -} // namespace reindexer \ No newline at end of file +} // namespace reindexer diff --git a/cpp_src/core/selectfunc/functions/highlight.cc b/cpp_src/core/selectfunc/functions/highlight.cc index 08921d3b3..bbc4ac4f6 100644 --- a/cpp_src/core/selectfunc/functions/highlight.cc +++ b/cpp_src/core/selectfunc/functions/highlight.cc @@ -38,7 +38,7 @@ bool Highlight::Process(ItemRef& res, PayloadType& pl_type, const SelectFuncStru throw Error(errLogic, "Unable to apply highlight function to the non-string field '%s'", func.field); } - const std::string* data = p_string(kr[0]).getCxxstr(); + const std::string_view data = std::string_view(p_string(kr[0])); auto pva = dataFtCtx.area[it->second].GetAreas(func.fieldNo); if (!pva || pva->Empty()) { @@ -47,11 +47,11 @@ bool Highlight::Process(ItemRef& res, PayloadType& pl_type, const SelectFuncStru auto& va = *pva; std::string result_string; - result_string.reserve(data->size() + va.Size() * (func.funcArgs[0].size() + func.funcArgs[1].size())); - result_string = *data; + result_string.reserve(data.size() + va.Size() * (func.funcArgs[0].size() + func.funcArgs[1].size())); + result_string.append(data); auto splitterTask = ftctx->GetData()->splitter->CreateTask(); - splitterTask->SetText(*data); + splitterTask->SetText(data); int offset = 0; for (auto area : va.GetData()) { diff --git a/cpp_src/core/selectfunc/functions/snippet.cc b/cpp_src/core/selectfunc/functions/snippet.cc index e92856dc6..8c35387b3 100644 --- a/cpp_src/core/selectfunc/functions/snippet.cc +++ b/cpp_src/core/selectfunc/functions/snippet.cc @@ -100,7 +100,7 @@ void Snippet::init(const SelectFuncStruct& func) { isInit_ = true; } -void Snippet::addSnippet(std::string& resultString, const std::string& data, const Area& snippetAreaPrev, +void Snippet::addSnippet(std::string& resultString, std::string_view data, const Area& snippetAreaPrev, const Area& snippetAreaPrevChar) const { resultString.append(preDelim_); @@ -181,6 +181,7 @@ A Snippet::RecalcZoneHelper::RecalcZoneToOffset(const Area& area) { wordCount_++; } + // NOLINTNEXTLINE(bugprone-suspicious-stringview-data-usage) auto b = calcUtf8BeforeDelims(data_.data(), outAreas.zoneArea.start, before_, leftBound_); auto a = calcUtf8AfterDelims({data_.data() + outAreas.zoneArea.end, data_.size() - outAreas.zoneArea.end}, after_, rightBound_); outAreas.snippetArea.start = outAreas.zoneArea.start - b.first; @@ -192,7 +193,7 @@ A Snippet::RecalcZoneHelper::RecalcZoneToOffset(const Area& area) { return outAreas; } -void Snippet::buildResult(RecalcZoneHelper& recalcZoneHelper, const AreasInField& pva, const std::string& data, +void Snippet::buildResult(RecalcZoneHelper& recalcZoneHelper, const AreasInField& pva, std::string_view data, std::string& resultString) { // resultString =preDelim_+with_area_str+data_str_before+marker_before+zone_str+marker_after+data_strAfter+postDelim_ Area snippetAreaPrev; @@ -235,7 +236,7 @@ void Snippet::buildResult(RecalcZoneHelper& recalcZoneHelper, const AreasInField resultString.append(postDelim_); } -void Snippet::buildResultWithPrefix(RecalcZoneHelper& recalcZoneHelper, const AreasInField& pva, const std::string& data, +void Snippet::buildResultWithPrefix(RecalcZoneHelper& recalcZoneHelper, const AreasInField& pva, std::string_view data, std::string& resultString) { // resultString =preDelim_+with_area_str+data_str_before+marker_before+zone_str+marker_after+data_strAfter+postDelim_ Area snippetAreaPrev; @@ -296,7 +297,7 @@ bool Snippet::Process(ItemRef& res, PayloadType& plType, const SelectFuncStruct& throw Error(errLogic, "Unable to apply snippet function to the non-string field '%s'", func.field); } - const std::string* data = p_string(kr[0]).getCxxstr(); + const std::string_view data = std::string_view(p_string(kr[0])); auto pva = dataFtCtx.area[it->second].GetAreas(func.fieldNo); if (!pva || pva->Empty()) { @@ -304,14 +305,14 @@ bool Snippet::Process(ItemRef& res, PayloadType& plType, const SelectFuncStruct& } std::string resultString; - resultString.reserve(data->size()); + resultString.reserve(data.size()); - RecalcZoneHelper recalcZoneHelper(*data, ftctx->GetData()->splitter, after_, before_, leftBound_, rightBound_); + RecalcZoneHelper recalcZoneHelper(data, ftctx->GetData()->splitter, after_, before_, leftBound_, rightBound_); if (needAreaStr_) { - buildResultWithPrefix(recalcZoneHelper, *pva, *data, resultString); + buildResultWithPrefix(recalcZoneHelper, *pva, data, resultString); } else { - buildResult(recalcZoneHelper, *pva, *data, resultString); + buildResult(recalcZoneHelper, *pva, data, resultString); } stringsHolder.emplace_back(make_key_string(std::move(resultString))); diff --git a/cpp_src/core/selectfunc/functions/snippet.h b/cpp_src/core/selectfunc/functions/snippet.h index 58a93152a..448bb1d02 100644 --- a/cpp_src/core/selectfunc/functions/snippet.h +++ b/cpp_src/core/selectfunc/functions/snippet.h @@ -14,7 +14,7 @@ class Snippet { private: void init(const SelectFuncStruct& func); - void addSnippet(std::string& resultString, const std::string& data, const Area& snippetAreaPrev, const Area& snippetAreaPrevChar) const; + void addSnippet(std::string& resultString, std::string_view data, const Area& snippetAreaPrev, const Area& snippetAreaPrevChar) const; class RecalcZoneHelper { public: @@ -41,9 +41,8 @@ class Snippet { std::string_view leftBound_, rightBound_; }; - void buildResult(RecalcZoneHelper& recalcZoneHelper, const AreasInField& pva, const std::string& data, std::string& resultString); - - void buildResultWithPrefix(RecalcZoneHelper& recalcZoneHelper, const AreasInField& pva, const std::string& data, + void buildResult(RecalcZoneHelper& recalcZoneHelper, const AreasInField& pva, std::string_view data, std::string& resultString); + void buildResultWithPrefix(RecalcZoneHelper& recalcZoneHelper, const AreasInField& pva, std::string_view data, std::string& resultString); bool isInit_ = false; diff --git a/cpp_src/core/selectkeyresult.h b/cpp_src/core/selectkeyresult.h index f45b67fc9..7283976c2 100644 --- a/cpp_src/core/selectkeyresult.h +++ b/cpp_src/core/selectkeyresult.h @@ -177,7 +177,11 @@ class SelectKeyResult : public h_vector { size_t cnt = 0; for (const SingleSelectKeyResult& r : *this) { if (r.indexForwardIter_) { - cnt += r.indexForwardIter_->GetMaxIterations(limitIters); + auto iters = r.indexForwardIter_->GetMaxIterations(limitIters); + if (iters == std::numeric_limits::max()) { + return limitIters; + } + cnt += iters; } else if (r.isRange_) { cnt += std::abs(r.rEnd_ - r.rBegin_); } else if (r.useBtree_) { @@ -186,7 +190,7 @@ class SelectKeyResult : public h_vector { cnt += r.ids_.size(); } if (cnt > limitIters) { - break; + return limitIters; } } return cnt; diff --git a/cpp_src/core/sorting/sortexpression.cc b/cpp_src/core/sorting/sortexpression.cc index 1003f0cfa..cb0383a69 100644 --- a/cpp_src/core/sorting/sortexpression.cc +++ b/cpp_src/core/sorting/sortexpression.cc @@ -1,5 +1,4 @@ #include "sortexpression.h" -#include #include "core/namespace/namespaceimpl.h" #include "core/nsselecter/joinedselector.h" #include "core/nsselecter/joinedselectormock.h" @@ -26,6 +25,9 @@ static reindexer::VariantArray getFieldValues(reindexer::ConstPayload pv, reinde if (index == IndexValueType::SetByJsonPath) { pv.GetByJsonPath(column, tagsMatcher, values, reindexer::KeyValueType::Undefined{}); } else { + if (index >= pv.NumFields()) { + throw reindexer::Error(errQueryExec, "Composite fields in sort expression are not supported"); + } pv.Get(index, values); } return values; @@ -245,7 +247,8 @@ static ParseIndexNameResult parseIndexName(std::string_view& expr, const std: ++pos; } if (pos != end && *pos == '.') { - std::string_view namespaceName = {expr.data(), static_cast(pos - expr.data())}; + // NOLINTNEXTLINE (bugprone-suspicious-stringview-data-usage) + std::string_view namespaceName{expr.data(), static_cast(pos - expr.data())}; // Check for quotes in join expression to skip them joinedFieldInQuotes = namespaceName.at(0) == '"'; @@ -270,6 +273,7 @@ static ParseIndexNameResult parseIndexName(std::string_view& expr, const std: while (pos != end && kIndexNameSyms.test(*pos)) { ++pos; } + // NOLINTNEXTLINE (bugprone-suspicious-stringview-data-usage) std::string_view name{expr.data(), static_cast(pos - expr.data())}; if (name.empty()) { throwParseError(fullExpr, pos, "Expected index or function name."); @@ -301,22 +305,26 @@ static Point parsePoint(std::string_view& expr, std::string_view funcName, std:: StringToDoubleConverter::ALLOW_SPACES_AFTER_SIGN, 0.0, 0.0, nullptr, nullptr}; if (funcName != "st_geomfromtext") { + // NOLINTNEXTLINE (bugprone-suspicious-stringview-data-usage) throwParseError(fullExpr, expr.data(), "Unsupported function inside ST_Distance() : '" + std::string(funcName) + "'."); } expr.remove_prefix(1); skipSpaces(); if (expr.empty() || (expr[0] != '\'' && expr[0] != '"')) { + // NOLINTNEXTLINE (bugprone-suspicious-stringview-data-usage) throwParseError(fullExpr, expr.data(), "Expected \" or '."); } const char openQuote = expr[0]; expr.remove_prefix(1); skipSpaces(); if (!checkIfStartsWith("point"sv, expr)) { + // NOLINTNEXTLINE (bugprone-suspicious-stringview-data-usage) throwParseError(fullExpr, expr.data(), "Expected 'point'."); } expr.remove_prefix(5); skipSpaces(); if (expr.empty() || expr[0] != '(') { + // NOLINTNEXTLINE (bugprone-suspicious-stringview-data-usage) throwParseError(fullExpr, expr.data(), "Expected '('."); } expr.remove_prefix(1); @@ -324,6 +332,7 @@ static Point parsePoint(std::string_view& expr, std::string_view funcName, std:: int countOfCharsParsedAsDouble = 0; const double x = converter.StringToDouble(expr.data(), expr.size(), &countOfCharsParsedAsDouble); if (countOfCharsParsedAsDouble == 0) { + // NOLINTNEXTLINE (bugprone-suspicious-stringview-data-usage) throwParseError(fullExpr, expr.data(), "Expected number."); } expr.remove_prefix(countOfCharsParsedAsDouble); @@ -331,21 +340,25 @@ static Point parsePoint(std::string_view& expr, std::string_view funcName, std:: countOfCharsParsedAsDouble = 0; const double y = converter.StringToDouble(expr.data(), expr.size(), &countOfCharsParsedAsDouble); if (countOfCharsParsedAsDouble == 0) { + // NOLINTNEXTLINE (bugprone-suspicious-stringview-data-usage) throwParseError(fullExpr, expr.data(), "Expected number."); } expr.remove_prefix(countOfCharsParsedAsDouble); skipSpaces(); if (expr.empty() || expr[0] != ')') { + // NOLINTNEXTLINE (bugprone-suspicious-stringview-data-usage) throwParseError(fullExpr, expr.data(), "Expected ')'."); } expr.remove_prefix(1); skipSpaces(); if (expr.empty() || expr[0] != openQuote) { + // NOLINTNEXTLINE (bugprone-suspicious-stringview-data-usage) throwParseError(fullExpr, expr.data(), std::string("Expected ") + openQuote + '.'); } expr.remove_prefix(1); skipSpaces(); if (expr.empty() || expr[0] != ')') { + // NOLINTNEXTLINE (bugprone-suspicious-stringview-data-usage) throwParseError(fullExpr, expr.data(), "Expected ')'."); } expr.remove_prefix(1); @@ -360,6 +373,7 @@ void SortExpression::parseDistance(std::string_view& expr, const std::vector& skipSpaces(); if (parsedIndexName1.joinedSelectorIt != joinedSelectors.cend()) { if (expr.empty() || expr[0] != ',') { + // NOLINTNEXTLINE (bugprone-suspicious-stringview-data-usage) throwParseError(fullExpr, expr.data(), "Expected ','."); } expr.remove_prefix(1); @@ -369,6 +383,7 @@ void SortExpression::parseDistance(std::string_view& expr, const std::vector& if (parsedIndexName2.joinedSelectorIt != joinedSelectors.cend()) { if (parsedIndexName1.joinedSelectorIt == parsedIndexName2.joinedSelectorIt) { if (toLower(parsedIndexName1.name) == toLower(parsedIndexName2.name)) { + // NOLINTNEXTLINE (bugprone-suspicious-stringview-data-usage) throwParseError(fullExpr, expr.data(), "Distance between two same indexes"); } Append({op, negative}, @@ -393,6 +408,7 @@ void SortExpression::parseDistance(std::string_view& expr, const std::vector& const auto point = parsePoint(expr, toLower(parsedIndexName1.name), fullExpr, skipSpaces); skipSpaces(); if (expr.empty() || expr[0] != ',') { + // NOLINTNEXTLINE (bugprone-suspicious-stringview-data-usage) throwParseError(fullExpr, expr.data(), "Expected ','."); } expr.remove_prefix(1); @@ -400,6 +416,7 @@ void SortExpression::parseDistance(std::string_view& expr, const std::vector& const auto parsedIndexName2 = parseIndexName(expr, joinedSelectors, fullExpr); skipSpaces(); if (!expr.empty() && expr[0] == '(') { + // NOLINTNEXTLINE (bugprone-suspicious-stringview-data-usage) throwParseError(fullExpr, expr.data(), "Allowed only one function inside ST_Geometry"); } if (parsedIndexName2.joinedSelectorIt != joinedSelectors.cend()) { @@ -411,6 +428,7 @@ void SortExpression::parseDistance(std::string_view& expr, const std::vector& } } else { if (expr.empty() || expr[0] != ',') { + // NOLINTNEXTLINE (bugprone-suspicious-stringview-data-usage) throwParseError(fullExpr, expr.data(), "Expected ','."); } expr.remove_prefix(1); @@ -428,6 +446,7 @@ void SortExpression::parseDistance(std::string_view& expr, const std::vector& Append({op, negative}, DistanceFromPoint{std::move(parsedIndexName1.name), point}); } else { if (toLower(parsedIndexName1.name) == toLower(parsedIndexName2.name)) { + // NOLINTNEXTLINE (bugprone-suspicious-stringview-data-usage) throwParseError(fullExpr, expr.data(), "Distance between two same indexes"); } Append({op, negative}, DistanceBetweenIndexes{std::move(parsedIndexName1.name), std::move(parsedIndexName2.name)}); @@ -469,6 +488,7 @@ std::string_view SortExpression::parse(std::string_view expr, bool* containIndex expr.remove_prefix(1); skipSpaces(); if (expr.empty()) { + // NOLINTNEXTLINE (bugprone-suspicious-stringview-data-usage) throwParseError(fullExpr, expr.data(), "The expression unexpected ends after unary operator."); } } @@ -477,6 +497,7 @@ std::string_view SortExpression::parse(std::string_view expr, bool* containIndex OpenBracket({op, negative}); expr = parse(expr, containIndexOrFunction, fullExpr, joinedSelectors); if (expr.empty() || expr[0] != ')') { + // NOLINTNEXTLINE (bugprone-suspicious-stringview-data-usage) throwParseError(fullExpr, expr.data(), "Expected ')'."); } expr.remove_prefix(1); @@ -514,9 +535,11 @@ std::string_view SortExpression::parse(std::string_view expr, bool* containIndex } else if (funcName == "st_distance") { parseDistance(expr, joinedSelectors, fullExpr, op, negative, skipSpaces); } else { + // NOLINTNEXTLINE (bugprone-suspicious-stringview-data-usage) throwParseError(fullExpr, expr.data(), "Unsupported function name : '" + funcName + "'."); } if (expr.empty() || expr[0] != ')') { + // NOLINTNEXTLINE (bugprone-suspicious-stringview-data-usage) throwParseError(fullExpr, expr.data(), "Expected ')'."); } expr.remove_prefix(1); @@ -557,6 +580,7 @@ std::string_view SortExpression::parse(std::string_view expr, bool* containIndex } break; default: + // NOLINTNEXTLINE (bugprone-suspicious-stringview-data-usage) throwParseError(fullExpr, expr.data(), std::string("Expected ')', '+', '-', '*' of '/', but obtained '") + expr[0] + "'."); } @@ -566,6 +590,7 @@ std::string_view SortExpression::parse(std::string_view expr, bool* containIndex skipSpaces(); } if (expectValue) { + // NOLINTNEXTLINE (bugprone-suspicious-stringview-data-usage) throwParseError(fullExpr, expr.data(), "Expected value."); } if (needCloseBracket) { @@ -580,9 +605,11 @@ SortExpression SortExpression::Parse(std::string_view expression, const std::vec bool containIndexOrFunction = false; const auto expr = result.parse(expression, &containIndexOrFunction, expression, joinedSelector); if (!expr.empty()) { + // NOLINTNEXTLINE (bugprone-suspicious-stringview-data-usage) throwParseError(expression, expr.data(), ""); } if (!containIndexOrFunction) { + // NOLINTNEXTLINE (bugprone-suspicious-stringview-data-usage) throwParseError(expression, expr.data(), "Sort expression does not depend from namespace data"); } return result; diff --git a/cpp_src/debug/terminate_handler.cpp b/cpp_src/debug/terminate_handler.cpp index e69791d04..f76652f1c 100644 --- a/cpp_src/debug/terminate_handler.cpp +++ b/cpp_src/debug/terminate_handler.cpp @@ -37,6 +37,8 @@ static void terminate_handler() { } const auto writer = backtrace_get_writer(); writer(sout.str()); + std::cerr << "Reindexer was terminated by terminate()-call. STDERR info:\n"; + std::cerr << sout.str() << std::endl; sout.str(std::string()); sout.clear(); print_crash_query(sout); @@ -45,6 +47,7 @@ static void terminate_handler() { sout.clear(); print_backtrace(sout, nullptr, -1); writer(sout.str()); + std::cerr << sout.str() << std::endl; exit(-1); } diff --git a/cpp_src/estl/elist.h b/cpp_src/estl/elist.h index b14dc4832..bb49c3879 100644 --- a/cpp_src/estl/elist.h +++ b/cpp_src/estl/elist.h @@ -5,7 +5,7 @@ namespace reindexer { // Wrapper for std::list, which holds list's size. -// For some older libstdc++ implemetation, std::list::size() has O(N) complection +// For some older libstdc++ implementation, std::list::size() has O(N) complection // (for example, Centos7 with devtoolsets is affected to this issue) template class elist { diff --git a/cpp_src/estl/h_vector.h b/cpp_src/estl/h_vector.h index f727fc5c1..4b950503b 100644 --- a/cpp_src/estl/h_vector.h +++ b/cpp_src/estl/h_vector.h @@ -17,14 +17,6 @@ template class h_vector : public std::vector {}; #else -#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 4 -template -using is_trivially_default_constructible = std::has_trivial_default_constructor; -#else -template -using is_trivially_default_constructible = std::is_trivially_default_constructible; -#endif - template class h_vector { static_assert(holdSize > 0); @@ -228,7 +220,7 @@ class h_vector { void resize(size_type sz) { grow(sz); - if constexpr (!reindexer::is_trivially_default_constructible::value) { + if constexpr (!std::is_trivially_default_constructible::value) { const pointer p = ptr(); const size_type old_sz = size_; for (size_type i = old_sz; i < sz; ++i) { @@ -294,19 +286,22 @@ class h_vector { } } void push_back(const T& v) { - grow(size_ + 1); - new (ptr() + size_) T(v); + const auto size = size_; + grow(size + 1); + new (ptr() + size) T(v); ++size_; } void push_back(T&& v) { - grow(size_ + 1); - new (ptr() + size_) T(std::move(v)); + const auto size = size_; + grow(size + 1); + new (ptr() + size) T(std::move(v)); ++size_; } template reference emplace_back(Args&&... args) { - grow(size_ + 1); - auto p = ptr() + size_; + const auto size = size_; + grow(size + 1); + auto p = ptr() + size; new (p) T(std::forward(args)...); ++size_; return *p; @@ -498,19 +493,19 @@ class h_vector { protected: pointer ptr() noexcept { return is_hdata() ? reinterpret_cast(hdata_) : e_.data_; } const_pointer ptr() const noexcept { return is_hdata() ? reinterpret_cast(hdata_) : e_.data_; } - void destruct() noexcept { + RX_ALWAYS_INLINE void destruct() noexcept { if (is_hdata()) { if constexpr (!std::is_trivially_destructible_v) { - const size_type sz = size_; - for (size_type i = 0; i < sz; ++i) { - reinterpret_cast(hdata_)[i].~T(); + auto beg = reinterpret_cast(hdata_), end = beg + size_; + for (auto ptr = beg; ptr != end; ++ptr) { + ptr->~T(); } } } else { if constexpr (!std::is_trivially_destructible_v) { - const size_type sz = size_; - for (size_type i = 0; i < sz; ++i) { - e_.data_[i].~T(); + auto beg = e_.data_, end = beg + size_; + for (auto ptr = beg; ptr != end; ++ptr) { + ptr->~T(); } } operator delete(e_.data_); diff --git a/cpp_src/estl/intrusive_ptr.h b/cpp_src/estl/intrusive_ptr.h index b45474137..33ec974c8 100644 --- a/cpp_src/estl/intrusive_ptr.h +++ b/cpp_src/estl/intrusive_ptr.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include "tools/assertrx.h" namespace reindexer { @@ -15,86 +16,57 @@ class intrusive_ptr { constexpr intrusive_ptr() noexcept = default; constexpr intrusive_ptr(std::nullptr_t) noexcept {} - - intrusive_ptr(T* p, bool add_ref = true) noexcept : px(p) { - if (px != 0 && add_ref) { + intrusive_ptr(T* p) noexcept : px(p) { intrusive_ptr_add_ref(px); } + intrusive_ptr(T* p, bool add_ref) noexcept : px(p) { + if (add_ref) { intrusive_ptr_add_ref(px); } } - template intrusive_ptr(const intrusive_ptr& rhs) noexcept : px(rhs.get()) { - if (px != 0) { - intrusive_ptr_add_ref(px); - } - } - - intrusive_ptr(const intrusive_ptr& rhs) noexcept : px(rhs.px) { - if (px != 0) { - intrusive_ptr_add_ref(px); - } - } - - ~intrusive_ptr() { - if (px != 0) { - intrusive_ptr_release(px); - } + intrusive_ptr_add_ref(px); } + intrusive_ptr(const intrusive_ptr& rhs) noexcept : px(rhs.px) { intrusive_ptr_add_ref(px); } + intrusive_ptr(intrusive_ptr&& rhs) noexcept : px(rhs.px) { rhs.px = 0; } + ~intrusive_ptr() { intrusive_ptr_release(px); } template intrusive_ptr& operator=(const intrusive_ptr& rhs) noexcept { this_type(rhs).swap(*this); return *this; } - - intrusive_ptr(intrusive_ptr&& rhs) noexcept : px(rhs.px) { rhs.px = 0; } - - intrusive_ptr& operator=(intrusive_ptr&& rhs) noexcept { - this_type(static_cast(rhs)).swap(*this); - return *this; - } // NOLINTNEXTLINE(bugprone-unhandled-self-assignment) intrusive_ptr& operator=(const intrusive_ptr& rhs) noexcept { this_type(rhs).swap(*this); return *this; } - + intrusive_ptr& operator=(intrusive_ptr&& rhs) noexcept { + this_type(std::move(rhs)).swap(*this); + return *this; + } intrusive_ptr& operator=(T* rhs) noexcept { this_type(rhs).swap(*this); return *this; } void reset() noexcept { this_type().swap(*this); } - void reset(T* rhs) noexcept { this_type(rhs).swap(*this); } - bool unique() const noexcept { - if (px == 0) { - return true; - } - return intrusive_ptr_is_unique(px); - } + bool unique() const noexcept { return intrusive_ptr_is_unique(px); } T* get() const noexcept { return px; } - T& operator*() const noexcept { - assertrx(px != 0); + assertrx_dbg(px != 0); return *px; } - T* operator->() const noexcept { - assertrx(px != 0); + assertrx_dbg(px != 0); return px; } - typedef T* this_type::*unspecified_bool_type; + typedef T* this_type::* unspecified_bool_type; operator unspecified_bool_type() const noexcept { return px == 0 ? 0 : &this_type::px; } - - void swap(intrusive_ptr& rhs) noexcept { - T* tmp = px; - px = rhs.px; - rhs.px = tmp; - } + void swap(intrusive_ptr& rhs) noexcept { std::swap(px, rhs.px); } private: T* px{nullptr}; @@ -275,7 +247,7 @@ class intrusive_rc_base { friend void intrusive_ptr_add_ref(intrusive_rc_base* x) noexcept; friend void intrusive_ptr_release(intrusive_rc_base* x) noexcept; - friend bool intrusive_ptr_is_unique(intrusive_rc_base* x) noexcept; + friend bool intrusive_ptr_is_unique(const intrusive_rc_base* x) noexcept; }; inline void intrusive_ptr_add_ref(intrusive_rc_base* x) noexcept { @@ -290,7 +262,7 @@ inline void intrusive_ptr_release(intrusive_rc_base* x) noexcept { } } -inline bool intrusive_ptr_is_unique(intrusive_rc_base* x) noexcept { return !x || (x->refcount == 1); } +inline bool intrusive_ptr_is_unique(const intrusive_rc_base* x) noexcept { return !x || (x->refcount == 1); } template intrusive_ptr make_intrusive(Args&&... args) { diff --git a/cpp_src/estl/suffix_map.h b/cpp_src/estl/suffix_map.h index d303b6b4e..7d25103ca 100644 --- a/cpp_src/estl/suffix_map.h +++ b/cpp_src/estl/suffix_map.h @@ -26,7 +26,7 @@ class suffix_map { iterator(const iterator& other) noexcept : idx_(other.idx_), m_(other.m_) {} // NOLINTNEXTLINE(bugprone-unhandled-self-assignment) iterator& operator=(const iterator& other) noexcept { - idx_ = other.idx; + idx_ = other.idx_; m_ = other.m_; return *this; } @@ -35,7 +35,7 @@ class suffix_map { return value_type(std::make_pair(p, m_->mapped_[m_->sa_[idx_]])); } - const value_type operator->() const { + value_type operator->() const { auto* p = &m_->text_[m_->sa_[idx_]]; return value_type(std::make_pair(p, m_->mapped_[m_->sa_[idx_]])); } @@ -136,25 +136,21 @@ class suffix_map { return end(); } - int insert(std::string_view word, const V& val, int virtual_len = -1) { - if (virtual_len == -1) { - virtual_len = word.length(); - } + int insert(std::string_view word, const V& val) { int wpos = text_.size(); size_t real_len = word.length(); text_.insert(text_.end(), word.begin(), word.end()); text_.emplace_back('\0'); mapped_.insert(mapped_.end(), real_len + 1, val); words_.emplace_back(wpos); - words_len_.emplace_back(real_len, virtual_len); + words_len_.emplace_back(real_len); built_ = false; return wpos; } const CharT* word_at(int idx) const noexcept { return &text_[words_[idx]]; } - int16_t word_len_at(int idx) const noexcept { return words_len_[idx].first; } - int16_t virtual_word_len(int idx) const noexcept { return words_len_[idx].second; } + int16_t word_len_at(int idx) const noexcept { return words_len_[idx]; } void build() { if (built_) { @@ -220,7 +216,7 @@ class suffix_map { std::vector sa_, words_; std::vector lcp_; - std::vector> words_len_; + std::vector words_len_; std::vector mapped_; std::vector text_; bool built_ = false; diff --git a/cpp_src/estl/tokenizer.h b/cpp_src/estl/tokenizer.h index 1304e7268..1e9602388 100644 --- a/cpp_src/estl/tokenizer.h +++ b/cpp_src/estl/tokenizer.h @@ -1,8 +1,8 @@ #pragma once #include +#include "core/ft/usingcontainer.h" #include "core/keyvalue/variant.h" -#include "estl/h_vector.h" namespace reindexer { @@ -17,14 +17,9 @@ class token { token& operator=(token&&) noexcept = default; [[nodiscard]] RX_ALWAYS_INLINE std::string_view text() const noexcept { return std::string_view(text_.data(), text_.size()); } - void to_lower() noexcept { - for (auto& c : text_) { - c = tolower(c); - } - } token_type type = TokenSymbol; - h_vector text_; + RVector text_; }; class tokenizer { diff --git a/cpp_src/gtests/tests/API/api.cc b/cpp_src/gtests/tests/API/api.cc index 517955efd..683690eae 100644 --- a/cpp_src/gtests/tests/API/api.cc +++ b/cpp_src/gtests/tests/API/api.cc @@ -1,5 +1,6 @@ #include +#include "debug/backtrace.h" #include "gtest/gtest.h" #include "tools/assertrx.h" #include "tools/fsops.h" diff --git a/cpp_src/gtests/tests/API/base_tests.cc b/cpp_src/gtests/tests/API/base_tests.cc index e8ee49e98..011ccaafa 100644 --- a/cpp_src/gtests/tests/API/base_tests.cc +++ b/cpp_src/gtests/tests/API/base_tests.cc @@ -1173,9 +1173,11 @@ TEST_F(ReindexerApi, SortByUnorderedIndexWithJoins) { } static void TestDSLParseCorrectness(const std::string& testDsl) { - Query query; - Error err = query.FromJSON(testDsl); - EXPECT_TRUE(err.ok()) << err.what(); + try { + Query query = query.FromJSON(testDsl); + } catch (Error& err) { + EXPECT_TRUE(err.ok()) << err.what(); + } } TEST_F(ReindexerApi, DslFieldsTest) { @@ -1373,14 +1375,14 @@ TEST_F(ReindexerApi, DistinctQueriesEncodingTest) { std::string dsl = q1.GetJSON(); Query q2; - const auto err = q2.FromJSON(dsl); - ASSERT_TRUE(err.ok()) << err.what(); + EXPECT_NO_THROW(q2 = Query::FromJSON(dsl)); EXPECT_EQ(q1, q2) << "q1: " << q1.GetSQL() << "\nq2: " << q2.GetSQL(); Query q3{Query(default_namespace).Distinct("name").Distinct("city").Where("id", CondGt, static_cast(10))}; std::string sql2 = q3.GetSQL(); - Query q4 = Query::FromSQL(sql2); + Query q4; + EXPECT_NO_THROW(q4 = Query::FromSQL(sql2)); EXPECT_EQ(q3, q4) << "q3: " << q3.GetSQL() << "\nq4: " << q4.GetSQL(); EXPECT_EQ(sql2, q4.GetSQL()); } @@ -1808,8 +1810,7 @@ TEST_F(ReindexerApi, SelectFilterWithAggregationConstraints) { std::string sql = "select id, distinct(year) from test_namespace"; EXPECT_NO_THROW(q = Query::FromSQL(sql)); - Error status = Query().FromJSON(q.GetJSON()); - EXPECT_TRUE(status.ok()) << status.what(); + EXPECT_NO_THROW(Query::FromJSON(q.GetJSON())); q = Query().Select({"id"}); EXPECT_NO_THROW(q.Aggregate(AggDistinct, {"year"}, {})); @@ -1818,16 +1819,21 @@ TEST_F(ReindexerApi, SelectFilterWithAggregationConstraints) { EXPECT_THROW(q = Query::FromSQL(sql), Error); q = Query(default_namespace).Select({"id"}); q.aggregations_.emplace_back(reindexer::AggregateEntry{AggMax, {"year"}}); - status = Query().FromJSON(q.GetJSON()); - EXPECT_FALSE(status.ok()); - EXPECT_TRUE(status.what() == std::string(reindexer::kAggregationWithSelectFieldsMsgError)); + try { + Query::FromJSON(q.GetJSON()); + } catch (Error& err) { + EXPECT_FALSE(err.ok()); + EXPECT_EQ(err.what(), reindexer::kAggregationWithSelectFieldsMsgError); + } + EXPECT_THROW(q.Aggregate(AggMax, {"price"}, {}), Error); sql = "select facet(year), id, name from test_namespace"; EXPECT_THROW(q = Query::FromSQL(sql), Error); q = Query(default_namespace).Select({"id", "name"}); EXPECT_THROW(q.Aggregate(AggFacet, {"year"}, {}), Error); - status = Query().FromJSON(fmt::sprintf(R"({"namespace":"%s", + try { + Query::FromJSON(fmt::sprintf(R"({"namespace":"%s", "limit":-1, "offset":0, "req_total":"disabled", @@ -1849,9 +1855,11 @@ TEST_F(ReindexerApi, SelectFilterWithAggregationConstraints) { "fields":["year"] } ]})", - default_namespace)); - EXPECT_FALSE(status.ok()); - EXPECT_TRUE(status.what() == std::string(reindexer::kAggregationWithSelectFieldsMsgError)); + default_namespace)); + } catch (Error& err) { + EXPECT_FALSE(err.ok()); + EXPECT_EQ(err.what(), reindexer::kAggregationWithSelectFieldsMsgError); + } EXPECT_THROW((void)Query::FromSQL("select max(id), * from test_namespace"), Error); EXPECT_THROW((void)Query::FromSQL("select *, max(id) from test_namespace"), Error); diff --git a/cpp_src/gtests/tests/CMakeLists.txt b/cpp_src/gtests/tests/CMakeLists.txt index b6f9ae110..c6f658c8f 100644 --- a/cpp_src/gtests/tests/CMakeLists.txt +++ b/cpp_src/gtests/tests/CMakeLists.txt @@ -8,6 +8,7 @@ list(APPEND REINDEXER_LIBRARIES reindexer_server_library reindexer ${REINDEXER_L option(XML_REPORT "Enable XML xUnit report" OFF) option(GH_CI_OSX "Option for Github CI on OSX" OFF) +option(GTEST_SKIP_SHARDING "Skips Sharding-related tests. Option for Github tests with ASAN - free runners run out of memory" OFF) set(GTEST_WORKERS "6" CACHE STRING "Number of workers for gtest_parallel") include_directories(fixtures mocks) @@ -68,15 +69,18 @@ if (WITH_ASAN OR WITH_TSAN OR WITH_STDLIB_DEBUG) set(GTEST_TIMEOUT 2000) endif() +if(GTEST_SKIP_SHARDING) + set(GTEST_EXTRA_OPTIONS "--gtest_filter=-Sharding*:SnapshotTestApi.ForceSyncFromRemoteToLocal") +endif() find_program(GTEST_PARALLEL "gtest-parallel") if (GTEST_PARALLEL) if(XML_REPORT) - add_test(NAME gtests COMMAND gtest-parallel --gtest_color=no --serialize_test_cases --print_test_times --timeout ${GTEST_TIMEOUT} --gtest_output=xml:${REINDEXER_SOURCE_PATH}/testReport.xml "./${TARGET}") + add_test(NAME gtests COMMAND gtest-parallel --gtest_color=no --serialize_test_cases --print_test_times --timeout ${GTEST_TIMEOUT} --gtest_output=xml:${REINDEXER_SOURCE_PATH}/testReport.xml ${GTEST_EXTRA_OPTIONS} "./${TARGET}") else() - add_test(NAME gtests COMMAND gtest-parallel --serialize_test_cases --print_test_times --timeout ${GTEST_TIMEOUT} "./${TARGET}") + add_test(NAME gtests COMMAND gtest-parallel --serialize_test_cases --print_test_times --timeout ${GTEST_TIMEOUT} ${GTEST_EXTRA_OPTIONS} "./${TARGET}") endif() else() - add_test(NAME gtests COMMAND ${TARGET} --gtest_color=yes) + add_test(NAME gtests COMMAND ${TARGET} --gtest_color=yes ${GTEST_EXTRA_OPTIONS}) endif() if (WITH_TSAN) diff --git a/cpp_src/gtests/tests/fixtures/ft_api.cc b/cpp_src/gtests/tests/fixtures/ft_api.cc index b0e264bd9..9f575c556 100644 --- a/cpp_src/gtests/tests/fixtures/ft_api.cc +++ b/cpp_src/gtests/tests/fixtures/ft_api.cc @@ -167,10 +167,10 @@ reindexer::Error FTApi::Delete(int id) { return this->rt.reindexer->Delete("nm1", item); } -reindexer::QueryResults FTApi::SimpleCompositeSelect(std::string word) { +reindexer::QueryResults FTApi::SimpleCompositeSelect(std::string_view word) { auto qr{reindexer::Query("nm1").Where("ft3", CondEq, word)}; reindexer::QueryResults res; - auto mqr{reindexer::Query("nm2").Where("ft3", CondEq, std::move(word))}; + auto mqr{reindexer::Query("nm2").Where("ft3", CondEq, word)}; mqr.AddFunction("ft1 = snippet(,\"\",3,2,,d)"); qr.Merge(std::move(mqr)); @@ -181,11 +181,11 @@ reindexer::QueryResults FTApi::SimpleCompositeSelect(std::string word) { return res; } -reindexer::QueryResults FTApi::CompositeSelectField(const std::string& field, std::string word) { - word = '@' + field + ' ' + word; - auto qr{reindexer::Query("nm1").Where("ft3", CondEq, word)}; +reindexer::QueryResults FTApi::CompositeSelectField(const std::string& field, std::string_view word) { + const auto query = fmt::format("@{} {}", field, word); + auto qr{reindexer::Query("nm1").Where("ft3", CondEq, query)}; reindexer::QueryResults res; - auto mqr{reindexer::Query("nm2").Where("ft3", CondEq, std::move(word))}; + auto mqr{reindexer::Query("nm2").Where("ft3", CondEq, query)}; mqr.AddFunction(field + " = snippet(,\"\",3,2,,d)"); qr.Merge(std::move(mqr)); @@ -196,8 +196,8 @@ reindexer::QueryResults FTApi::CompositeSelectField(const std::string& field, st return res; } -reindexer::QueryResults FTApi::StressSelect(std::string word) { - const auto qr{reindexer::Query("nm1").Where("ft3", CondEq, std::move(word))}; +reindexer::QueryResults FTApi::StressSelect(std::string_view word) { + const auto qr{reindexer::Query("nm1").Where("ft3", CondEq, word)}; reindexer::QueryResults res; auto err = rt.reindexer->Select(qr, res); EXPECT_TRUE(err.ok()) << err.what(); diff --git a/cpp_src/gtests/tests/fixtures/ft_api.h b/cpp_src/gtests/tests/fixtures/ft_api.h index b9c4655f3..f91d9a577 100644 --- a/cpp_src/gtests/tests/fixtures/ft_api.h +++ b/cpp_src/gtests/tests/fixtures/ft_api.h @@ -35,9 +35,9 @@ class FTApi : public ::testing::TestWithParam CreateAllPermutatedQueries(const std::string& queryStart, std::vector words, const std::string& queryEnd, const std::string& sep = " "); void CheckAllPermutations(const std::string& queryStart, const std::vector& words, const std::string& queryEnd, diff --git a/cpp_src/gtests/tests/fixtures/fuzzing/random_generator.cc b/cpp_src/gtests/tests/fixtures/fuzzing/random_generator.cc index fc429300e..a68d0e2fa 100644 --- a/cpp_src/gtests/tests/fixtures/fuzzing/random_generator.cc +++ b/cpp_src/gtests/tests/fixtures/fuzzing/random_generator.cc @@ -391,7 +391,7 @@ std::vector RandomGenerator::RndFieldsForCompositeIndex(const std::vecto const size_t count = compositeIndexSize(scalarIndexes.size()); result.reserve(count); const bool uniqueFields = count <= scalarIndexes.size() && !RndErr(); - // TODO unexisted and not indexed fields + // TODO non-existent and not indexed fields if (uniqueFields) { auto scalars = scalarIndexes; while (result.size() < count) { diff --git a/cpp_src/gtests/tests/fixtures/grpcclient_api.h b/cpp_src/gtests/tests/fixtures/grpcclient_api.h index f71b98d73..c7c725b35 100644 --- a/cpp_src/gtests/tests/fixtures/grpcclient_api.h +++ b/cpp_src/gtests/tests/fixtures/grpcclient_api.h @@ -264,7 +264,7 @@ class GrpcClientApi : public ReindexerApi { reindexer::WrSerializer wrser; reindexer::Serializer rdser(cjson); - std::deque storage; + reindexer::h_vector storage; reindexer::CJsonDecoder decoder(const_cast(nsTypes.first), storage); ASSERT_NO_THROW(decoder.Decode<>(pl, rdser, wrser)); ASSERT_TRUE(rdser.Eof()); diff --git a/cpp_src/gtests/tests/fixtures/queries_api.cc b/cpp_src/gtests/tests/fixtures/queries_api.cc index 03fa745a8..e97cfbb3f 100644 --- a/cpp_src/gtests/tests/fixtures/queries_api.cc +++ b/cpp_src/gtests/tests/fixtures/queries_api.cc @@ -612,11 +612,10 @@ void QueriesApi::CheckSqlQueries() { void QueriesApi::checkDslQuery(std::string_view dslQuery, Query&& checkQuery) { Query parsedQuery; - Error err = parsedQuery.FromJSON(dslQuery); - ASSERT_TRUE(err.ok()) << "Query: " << dslQuery << "; err: " << err.what(); + ASSERT_NO_THROW(parsedQuery = Query::FromJSON(dslQuery)); QueryResults dslQr; - err = rt.reindexer->Select(parsedQuery, dslQr); + auto err = rt.reindexer->Select(parsedQuery, dslQr); ASSERT_TRUE(err.ok()) << "Query: " << dslQuery << "; err: " << err.what(); QueryResults checkQr; diff --git a/cpp_src/gtests/tests/fixtures/queries_api.h b/cpp_src/gtests/tests/fixtures/queries_api.h index a5a3d6de5..d224c7a10 100644 --- a/cpp_src/gtests/tests/fixtures/queries_api.h +++ b/cpp_src/gtests/tests/fixtures/queries_api.h @@ -1103,8 +1103,10 @@ class QueriesApi : public ReindexerApi, public QueriesVerifier { ExecuteAndVerify(Query(compositeIndexesNs) .WhereComposite(kCompositeFieldTitleName.c_str(), CondLe, {{Variant(std::string(titleValue)), Variant(std::string(nameValue))}})); + constexpr size_t kStringKeysCnt = 1010; std::vector stringKeys; - for (size_t i = 0; i < 1010; ++i) { + stringKeys.reserve(kStringKeysCnt); + for (size_t i = 0; i < kStringKeysCnt; ++i) { stringKeys.emplace_back(VariantArray{Variant(RandString()), Variant(RandString())}); } ExecuteAndVerify(Query(compositeIndexesNs).WhereComposite(kCompositeFieldTitleName.c_str(), CondSet, stringKeys)); diff --git a/cpp_src/gtests/tests/fixtures/queries_verifier.h b/cpp_src/gtests/tests/fixtures/queries_verifier.h index 608349a42..258a51d5e 100644 --- a/cpp_src/gtests/tests/fixtures/queries_verifier.h +++ b/cpp_src/gtests/tests/fixtures/queries_verifier.h @@ -2,7 +2,8 @@ #include -#if defined(__GNUC__) && (__GNUC__ == 12) && defined(REINDEX_WITH_ASAN) +#if defined(__GNUC__) && ((__GNUC__ == 12) || (__GNUC__ == 13)) && defined(REINDEX_WITH_ASAN) +// regex header is broken in GCC 12.0-13.3 with ASAN #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" #include diff --git a/cpp_src/gtests/tests/fixtures/reindexertestapi.cc b/cpp_src/gtests/tests/fixtures/reindexertestapi.cc index 3d7317c52..a0fed480c 100644 --- a/cpp_src/gtests/tests/fixtures/reindexertestapi.cc +++ b/cpp_src/gtests/tests/fixtures/reindexertestapi.cc @@ -277,5 +277,20 @@ std::vector ReindexerTestApi::RandIntVector(size_t size, int start, int return vec; } +template +std::vector ReindexerTestApi::GetSerializedQrItems(reindexer::QueryResults& qr) { + std::vector items; + items.reserve(qr.Count()); + reindexer::WrSerializer wrser; + for (auto it : qr) { + EXPECT_TRUE(it.Status().ok()) << it.Status().what(); + wrser.Reset(); + auto err = it.GetJSON(wrser, false); + EXPECT_TRUE(err.ok()) << err.what(); + items.emplace_back(wrser.Slice()); + } + return items; +} + template class ReindexerTestApi; template class ReindexerTestApi; diff --git a/cpp_src/gtests/tests/fixtures/reindexertestapi.h b/cpp_src/gtests/tests/fixtures/reindexertestapi.h index ab02a1c0f..dbe0940d0 100644 --- a/cpp_src/gtests/tests/fixtures/reindexertestapi.h +++ b/cpp_src/gtests/tests/fixtures/reindexertestapi.h @@ -94,6 +94,8 @@ class ReindexerTestApi { void SetVerbose(bool v) noexcept { verbose = v; } std::shared_ptr reindexer; + static std::vector GetSerializedQrItems(reindexer::QueryResults& qr); + private: const std::string letters = "abcdefghijklmnopqrstuvwxyz"; const std::wstring ru_letters = L"абвгдеёжзийклмнопрстуфхцчшщъыьэюя"; diff --git a/cpp_src/gtests/tests/fixtures/systemhelpers.cc b/cpp_src/gtests/tests/fixtures/systemhelpers.cc index 0a427745e..3aaf24051 100644 --- a/cpp_src/gtests/tests/fixtures/systemhelpers.cc +++ b/cpp_src/gtests/tests/fixtures/systemhelpers.cc @@ -23,6 +23,7 @@ static const std::thread::id kMainThreadID = std::this_thread::get_id(); pid_t StartProcess(const std::string& program, const std::vector& params) { #ifdef __linux__ std::vector paramsPointers; + paramsPointers.reserve(params.size()); for (size_t i = 0; i < params.size(); i++) { paramsPointers.push_back(const_cast(params[i].c_str())); } diff --git a/cpp_src/gtests/tests/unit/cascade_replication_test.cc b/cpp_src/gtests/tests/unit/cascade_replication_test.cc index cfdb88753..0125b783e 100644 --- a/cpp_src/gtests/tests/unit/cascade_replication_test.cc +++ b/cpp_src/gtests/tests/unit/cascade_replication_test.cc @@ -255,7 +255,11 @@ TEST_F(CascadeReplicationApi, MasterSlaveSlaveReload) { auto leader = cluster.Get(0); TestNamespace1 ns1(leader); const int startId = 1000; +#ifdef REINDEX_WITH_ASAN + const int n2 = 4000; +#else // REINDEX_WITH_ASAN const int n2 = 20000; +#endif // REINDEX_WITH_ASAN auto AddThread = [&leader, &ns1]() { ns1.AddRows(leader, startId, n2); }; @@ -567,6 +571,7 @@ TEST_F(CascadeReplicationApi, NodeWithMasterAndSlaveNs1) { } { std::vector results_data; + results_data.reserve(2 * n); for (unsigned int i = 0; i < n; i++) { results_data.push_back(c1 + i); } @@ -1006,8 +1011,13 @@ TEST_F(CascadeReplicationApi, ConcurrentForceSync) { } // Fill leader's data +#ifdef REINDEX_WITH_ASAN + const size_t kRows = 2000; + const size_t kDataBytes = 100; +#else // REINDEX_WITH_ASAN const size_t kRows = 10000; const size_t kDataBytes = 1000; +#endif // REINDEX_WITH_ASAN std::vector testNsList; for (auto& ns : kNsList) { testNsList.emplace_back(nodes[0].Get(), ns); @@ -1198,3 +1208,43 @@ TEST_F(CascadeReplicationApi, FollowerNetworkAndSyncStatus) { cluster.ShutdownServer(1); AwaitFollowersState(cluster.Get(0), cluster::NodeStats::Status::Offline, cluster::NodeStats::SyncState::AwaitingResync); } + +TEST_F(CascadeReplicationApi, ManyLeadersOneFollowerTest) { + const int kLeadersCount = 5; + const int kBasePort = 7770; + const std::string kBaseDbPath(fs::JoinPath(kBaseTestsetDbPath, "ManyLeadersOneFollowerTest/node_")); + + std::vector leaders; + leaders.reserve(kLeadersCount); + ServerControl follower; + + std::vector nss; + nss.reserve(kLeadersCount); + + follower.InitServer(ServerControlConfig(kLeadersCount, kBasePort + kLeadersCount, kBasePort + 1000 + kLeadersCount, + kBaseDbPath + std::to_string(kLeadersCount), "db")); + follower.Get()->MakeFollower(); + + for (int serverId = 0; serverId < kLeadersCount; ++serverId) { + leaders.emplace_back().InitServer( + ServerControlConfig(serverId, kBasePort + serverId, kBasePort + 1000 + serverId, kBaseDbPath + std::to_string(serverId), "db")); + + nss.emplace_back(leaders.back().Get(), "ns_" + std::to_string(serverId)); + nss.back().AddRows(leaders.back().Get(), 0, 10); + + leaders.back().Get()->AddFollower(MakeDsn(reindexer_server::UserRole::kRoleReplication, follower.Get()), + std::vector{nss.back().nsName_}); + } + + auto sync = [&] { + for (int serverId = 0; serverId < kLeadersCount; ++serverId) { + WaitSync(leaders[serverId].Get(), follower.Get(), nss[serverId].nsName_); + } + }; + + sync(); + for (int serverId = 0; serverId < kLeadersCount; ++serverId) { + nss[serverId].AddRows(leaders[serverId].Get(), 10, 20); + } + sync(); +} diff --git a/cpp_src/gtests/tests/unit/clusterproxy_test.cc b/cpp_src/gtests/tests/unit/clusterproxy_test.cc index 1a8fe74d6..ea27d09d5 100644 --- a/cpp_src/gtests/tests/unit/clusterproxy_test.cc +++ b/cpp_src/gtests/tests/unit/clusterproxy_test.cc @@ -1121,6 +1121,7 @@ TEST_F(ClusterizationProxyApi, Shutdown) { std::this_thread::sleep_for(kSleepTime); } }; + threads.reserve(kClusterSize); for (size_t i = 0; i < kClusterSize; ++i) { threads.emplace_back(addItemFn, i % kClusterSize, kNsName); } diff --git a/cpp_src/gtests/tests/unit/composite_indexes_test.cc b/cpp_src/gtests/tests/unit/composite_indexes_test.cc index a43555b73..74f46f385 100644 --- a/cpp_src/gtests/tests/unit/composite_indexes_test.cc +++ b/cpp_src/gtests/tests/unit/composite_indexes_test.cc @@ -1,4 +1,8 @@ #include "composite_indexes_api.h" +#include "gmock/gmock.h" +#include "yaml-cpp/node/node.h" +#include "yaml-cpp/node/parse.h" +#include "yaml-cpp/yaml.h" using QueryResults = ReindexerApi::QueryResults; using Item = ReindexerApi::Item; @@ -102,8 +106,8 @@ TEST_F(CompositeIndexesApi, CompositeIndexesDropTest) { TEST_F(CompositeIndexesApi, CompositeIndexesSelectTest) { int priceValue = 77777, pagesValue = 88888; - const char* titleValue = "test book1 title"; - const char* nameValue = "test book1 name"; + constexpr std::string_view titleValue = "test book1 title"; + constexpr std::string_view nameValue = "test book1 name"; addCompositeIndex({kFieldNameBookid, kFieldNameBookid2}, CompositeIndexHash, IndexOpts().PK()); fillNamespace(0, 100); @@ -111,7 +115,7 @@ TEST_F(CompositeIndexesApi, CompositeIndexesSelectTest) { std::string compositeIndexName(getCompositeIndexName({kFieldNamePrice, kFieldNamePages})); addCompositeIndex({kFieldNamePrice, kFieldNamePages}, CompositeIndexHash, IndexOpts()); - addOneRow(300, 3000, titleValue, pagesValue, priceValue, nameValue); + addOneRow(300, 3000, std::string(titleValue), pagesValue, priceValue, std::string(nameValue)); fillNamespace(101, 200); auto qr = execAndCompareQuery( @@ -127,8 +131,8 @@ TEST_F(CompositeIndexesApi, CompositeIndexesSelectTest) { Item titleNameRow = qr.begin().GetItem(false); Variant selectedTitle = titleNameRow[kFieldNameTitle]; Variant selectedName = titleNameRow[kFieldNameName]; - EXPECT_EQ(static_cast(selectedTitle)->compare(std::string(titleValue)), 0); - EXPECT_EQ(static_cast(selectedName)->compare(std::string(nameValue)), 0); + EXPECT_EQ(static_cast(selectedTitle), titleValue); + EXPECT_EQ(static_cast(selectedName), nameValue); execAndCompareQuery(Query(default_namespace).WhereComposite(compositeIndexName, CondLt, {{Variant(priceValue), Variant(pagesValue)}})); execAndCompareQuery(Query(default_namespace).WhereComposite(compositeIndexName, CondLe, {{Variant(priceValue), Variant(pagesValue)}})); @@ -146,7 +150,7 @@ TEST_F(CompositeIndexesApi, CompositeIndexesSelectTest) { for (int i = 0; i < 10; ++i) { intKeys.emplace_back(VariantArray{Variant(i), Variant(i * 5)}); } - execAndCompareQuery(Query(default_namespace).WhereComposite(compositeIndexName.c_str(), CondSet, intKeys)); + execAndCompareQuery(Query(default_namespace).WhereComposite(compositeIndexName, CondSet, intKeys)); dropIndex(compositeIndexName); fillNamespace(401, 500); @@ -156,30 +160,23 @@ TEST_F(CompositeIndexesApi, CompositeIndexesSelectTest) { fillNamespace(701, 900); - execAndCompareQuery( - Query(default_namespace) - .WhereComposite(compositeIndexName2.c_str(), CondEq, {{Variant(std::string(titleValue)), Variant(std::string(nameValue))}})); - execAndCompareQuery( - Query(default_namespace) - .WhereComposite(compositeIndexName2.c_str(), CondGe, {{Variant(std::string(titleValue)), Variant(std::string(nameValue))}})); - execAndCompareQuery( - Query(default_namespace) - .WhereComposite(compositeIndexName2.c_str(), CondLt, {{Variant(std::string(titleValue)), Variant(std::string(nameValue))}})); - execAndCompareQuery( - Query(default_namespace) - .WhereComposite(compositeIndexName2.c_str(), CondLe, {{Variant(std::string(titleValue)), Variant(std::string(nameValue))}})); + execAndCompareQuery(Query(default_namespace).WhereComposite(compositeIndexName2, CondEq, {{Variant(titleValue), Variant(nameValue)}})); + execAndCompareQuery(Query(default_namespace).WhereComposite(compositeIndexName2, CondGe, {{Variant(titleValue), Variant(nameValue)}})); + execAndCompareQuery(Query(default_namespace).WhereComposite(compositeIndexName2, CondLt, {{Variant(titleValue), Variant(nameValue)}})); + execAndCompareQuery(Query(default_namespace).WhereComposite(compositeIndexName2, CondLe, {{Variant(titleValue), Variant(nameValue)}})); fillNamespace(1201, 2000); + constexpr size_t kStringKeysCnt = 1010; std::vector stringKeys; - for (size_t i = 0; i < 1010; ++i) { + stringKeys.reserve(kStringKeysCnt); + for (size_t i = 0; i < kStringKeysCnt; ++i) { stringKeys.emplace_back(VariantArray{Variant(RandString()), Variant(RandString())}); } - execAndCompareQuery(Query(default_namespace).WhereComposite(compositeIndexName2.c_str(), CondSet, stringKeys)); - execAndCompareQuery( - Query(default_namespace) - .Where(kFieldNameName, CondEq, nameValue) - .WhereComposite(compositeIndexName2.c_str(), CondEq, {{Variant(std::string(titleValue)), Variant(std::string(nameValue))}})); + execAndCompareQuery(Query(default_namespace).WhereComposite(compositeIndexName2, CondSet, stringKeys)); + execAndCompareQuery(Query(default_namespace) + .Where(kFieldNameName, CondEq, nameValue) + .WhereComposite(compositeIndexName2, CondEq, {{Variant(titleValue), Variant(nameValue)}})); dropIndex(compositeIndexName2); fillNamespace(201, 300); @@ -282,3 +279,76 @@ TEST_F(CompositeIndexesApi, CompositeOverCompositeTest) { EXPECT_EQ(err.what(), fmt::sprintf(kExpectedErrorPattern, getCompositeIndexName({kComposite1, kComposite2}), kComposite1)); addData(); } + +TEST_F(CompositeIndexesApi, FastUpdateIndex) { + const std::vector kIndexTypes{"-", "hash", "tree"}; + const std::vector kIndexNames{"IntIndex", "Int64Index", "DoubleIndex", "StringIndex"}; + const std::vector kFieldTypes{"int", "int64", "double", "string"}; + + auto indexDef = [](const std::string& idxName, const std::string& fieldType, const std::string& type) { + return reindexer::IndexDef{idxName, {idxName}, type, fieldType, IndexOpts()}; + }; + + auto err = rt.reindexer->AddIndex(default_namespace, reindexer::IndexDef{"id", {"id"}, "hash", "int", IndexOpts().PK()}); + ASSERT_TRUE(err.ok()) << err.what(); + + for (size_t i = 0; i < kIndexNames.size(); ++i) { + err = rt.reindexer->AddIndex(default_namespace, indexDef(kIndexNames[i], kFieldTypes[i], kIndexTypes[2])); + ASSERT_TRUE(err.ok()) << err.what(); + } + + auto compParts = {kIndexNames[0], kIndexNames[1], kIndexNames[2], kIndexNames[3]}; + + addCompositeIndex(compParts, CompositeIndexHash, IndexOpts()); + + for (int i = 0; i < 100; ++i) { + Item item = NewItem(default_namespace); + item["id"] = i; + item[kIndexNames[0]] = i % 10 == 0 ? 0 : rand(); + item[kIndexNames[1]] = i % 10 == 0 ? 1 : rand(); + item[kIndexNames[2]] = i % 10 == 0 ? 2.0 : (rand() / 100.0); + item[kIndexNames[3]] = i % 10 == 0 ? "string" : RandString(); + Upsert(default_namespace, item); + }; + + auto query = Query(default_namespace) + .Explain() + .WhereComposite(getCompositeIndexName(compParts), CondEq, {{Variant{0}, Variant{1}, Variant{2.0}, Variant{"string"}}}); + + auto qrCheck = rt.Select(query); + auto checkItems = rt.GetSerializedQrItems(qrCheck); + auto checkCount = qrCheck.Count(); + for (size_t i = 0; i < kIndexNames.size(); ++i) { + for (size_t j = 0; j < kIndexTypes.size(); ++j) { + if (kFieldTypes[i] == "double" && kIndexTypes[j] == "hash") { + continue; + } + auto err = rt.reindexer->UpdateIndex(default_namespace, indexDef(kIndexNames[i], kFieldTypes[i], kIndexTypes[j])); + ASSERT_TRUE(err.ok()) << err.what(); + + auto qr = rt.Select(query); + + ASSERT_EQ(rt.GetSerializedQrItems(qr), checkItems); + ASSERT_EQ(qr.Count(), checkCount); + + YAML::Node root = YAML::Load(qr.GetExplainResults()); + auto selectors = root["selectors"]; + ASSERT_TRUE(selectors.IsSequence()) << qr.GetExplainResults(); + ASSERT_EQ(selectors.size(), 1) << qr.GetExplainResults(); + ASSERT_EQ(selectors[0]["field"].as(), getCompositeIndexName(compParts)) << qr.GetExplainResults(); + } + } + + for (size_t i = 0; i < kFieldTypes.size(); ++i) { + for (size_t j = 0; j < kFieldTypes.size(); ++j) { + if (i == j) { + continue; + } + auto err = rt.reindexer->UpdateIndex(default_namespace, indexDef(kIndexNames[i], kFieldTypes[j], "tree")); + ASSERT_FALSE(err.ok()) << err.what(); + auto err1Text = fmt::format("Cannot remove index {} : it's a part of a composite index .*", kIndexNames[i]); + auto err2Text = fmt::format("Cannot convert key from type {} to {}", kFieldTypes[i], kFieldTypes[j]); + ASSERT_THAT(err.what(), testing::MatchesRegex(fmt::format("({}|{})", err1Text, err2Text))); + } + } +} diff --git a/cpp_src/gtests/tests/unit/dsl_parser_test.cc b/cpp_src/gtests/tests/unit/dsl_parser_test.cc index 8ca312173..9450a404e 100644 --- a/cpp_src/gtests/tests/unit/dsl_parser_test.cc +++ b/cpp_src/gtests/tests/unit/dsl_parser_test.cc @@ -3,8 +3,7 @@ static void checkQueryDslParse(const reindexer::Query& q) { const std::string dsl = q.GetJSON(); Query parsedQuery; - Error err = parsedQuery.FromJSON(dsl); - ASSERT_TRUE(err.ok()) << err.what() << "\nDSL:\n" << dsl; + ASSERT_NO_THROW(parsedQuery = Query::FromJSON(dsl)); ASSERT_EQ(q, parsedQuery) << "DSL:\n" << dsl << "\nOriginal query:\n" << q.GetSQL() << "\nParsed query:\n" << parsedQuery.GetSQL(); } diff --git a/cpp_src/gtests/tests/unit/equalposition_tests.cc b/cpp_src/gtests/tests/unit/equalposition_tests.cc index a0f735ef8..c13eb20c8 100644 --- a/cpp_src/gtests/tests/unit/equalposition_tests.cc +++ b/cpp_src/gtests/tests/unit/equalposition_tests.cc @@ -315,3 +315,41 @@ TEST_F(EqualPositionApi, SelectBrackets) { EXPECT_TRUE(err.ok()) << err.what(); VerifyQueryResult(qr, {kFieldA1, kFieldA2, kFieldA3}, {key1, key2, key3}, {CondEq, CondEq, CondEq}); } + +TEST_F(EqualPositionApi, EqualPositionBrackets) { + const std::string_view ns{"ns2"}; + rt.OpenNamespace(ns, StorageOpts().Enabled(false)); + rt.AddIndex(ns, {"id", "hash", "int", IndexOpts().PK()}); + rt.UpsertJSON(ns, R"#({"id": 0, "id1":11, "id2":21 "a1": [10, 20, 30], "a2": [20, 30, 40]}})#"); + rt.UpsertJSON(ns, R"#({"id": 1, "id1":11, "id2":21 "a1": [20, 10, 30], "a2": [10, 30, 40]}})#"); + rt.UpsertJSON(ns, R"#({"id": 2, "id1":11, "id2":21 "a1": [30, 10, 30], "a2": [30, 60, 40]}})#"); + + auto check = [this](std::string_view sql, std::string_view resJson) { + try { + auto qr = rt.Select(Query::FromSQL(sql)); + auto jsonVec = rt.GetSerializedQrItems(qr); + ASSERT_EQ(jsonVec.size(), 1); + ASSERT_EQ(jsonVec[0], resJson); + } catch (const Error& e) { + ASSERT_TRUE(false) << e.what(); + } + }; + check("SELECT id FROM ns2 WHERE a1=10 AND a2=20 equal_position(a1, a2)", R"#({"id":0})#"); + check("SELECT id FROM ns2 WHERE a1=10 AND a2=20 equal_position(a1, a2) equal_position(a1, a2)", R"#({"id":0})#"); + check("SELECT id FROM ns2 WHERE a1=10 AND a2=20 equal_position(a1, a2) equal_position(a1, a2) equal_position(a1, a2)", R"#({"id":0})#"); + + check("SELECT id FROM ns2 WHERE equal_position(a1, a2) a1=10 AND a2=30", R"#({"id":1})#"); + check("SELECT id FROM ns2 WHERE equal_position(a1, a2) equal_position(a1, a2) a1=10 AND a2=30", R"#({"id":1})#"); + check("SELECT id FROM ns2 WHERE equal_position(a1, a2) equal_position(a1, a2) equal_position(a1, a2) a1=10 AND a2=30", R"#({"id":1})#"); + + check("SELECT id FROM ns2 WHERE a1=10 equal_position(a1, a2) AND a2=20", R"#({"id":0})#"); + check("SELECT id FROM ns2 WHERE a1=10 equal_position(a1, a2) equal_position(a1, a2) AND a2=20", R"#({"id":0})#"); + check("SELECT id FROM ns2 WHERE a1=10 equal_position(a1, a2) equal_position(a1, a2) equal_position(a1, a2) AND a2=20", R"#({"id":0})#"); + + check("SELECT id FROM ns2 WHERE a1=10 AND equal_position(a1, a2) a2=20", R"#({"id":0})#"); + check("SELECT id FROM ns2 WHERE a1=10 AND equal_position(a1, a2) equal_position(a1, a2) a2=20", R"#({"id":0})#"); + check("SELECT id FROM ns2 WHERE a1=10 AND equal_position(a1, a2) equal_position(a1, a2) equal_position(a1, a2) a2=20", R"#({"id":0})#"); + + check("SELECT id FROM ns2 WHERE a1=10 AND a2=20 AND (id1=11 or id1=12) equal_position(a1, a2)", R"#({"id":0})#"); + check("SELECT id FROM ns2 WHERE a1=10 AND a2=20 AND equal_position(a1, a2) (id1=11 or id1=12)", R"#({"id":0})#"); +} diff --git a/cpp_src/gtests/tests/unit/ft/ft_generic.cc b/cpp_src/gtests/tests/unit/ft/ft_generic.cc index 702535dd2..ca7f312f7 100644 --- a/cpp_src/gtests/tests/unit/ft/ft_generic.cc +++ b/cpp_src/gtests/tests/unit/ft/ft_generic.cc @@ -140,7 +140,7 @@ TEST_P(FTGenericApi, MergeWithSameNSAndSelectFunctions) { for (const auto& query : CreateAllPermutatedQueries("", {"*entity", "somethin*"}, "")) { for (const auto& field : {std::string("ft1"), std::string("ft2")}) { - auto dsl = std::string("@").append(field).append(" ").append(query); + auto dsl = fmt::format("@{} {}", field, query); auto qr{reindexer::Query("nm1").Where("ft3", CondEq, dsl)}; reindexer::QueryResults res; auto mqr{reindexer::Query("nm1").Where("ft3", CondEq, std::move(dsl))}; @@ -835,9 +835,36 @@ TEST_P(FTGenericApi, SelectWithSeveralGroup) { TEST_P(FTGenericApi, NumberToWordsSelect) { Init(GetDefaultConfig()); - Add("оценка 5 майкл джордан 23"sv, ""sv); - - CheckAllPermutations("", {"пять", "+двадцать", "+три"}, "", {{"оценка !5! майкл джордан !23!", ""}}); + auto row1 = Add("оценка 52 майкл джордан 23 пятьдесят"sv); + auto row2 = Add("8"sv); + auto row3 = Add("41 цифра и еще цифра 241"sv); + auto row4 = Add("начало 120 цифра и еще цифра 9120 конец"sv); + auto row5 = Add("слово один пять два 5 семь 7 ещё пять слово"sv); + auto row6 = Add("слово один 5 два пять семь 7 ещё 5 слово"sv); + auto row7 = Add("1000000000000 1000000000 50000000055 1000000"sv); + auto row8 = Add("70 1 7 77 377 70 7"sv); + + auto select = [this](int id, const std::string& ftQuery, const std::string& result) { + auto q{reindexer::Query("nm1").Where("ft3", CondEq, std::string(ftQuery)).And().Where("id", CondEq, id).WithRank()}; + reindexer::QueryResults res; + q.AddFunction("ft3 = highlight(!,!)"); + auto err = rt.reindexer->Select(q, res); + EXPECT_TRUE(err.ok()) << err.what(); + ASSERT_EQ(res.Count(), 1); + auto item = res.begin().GetItem(); + std::string val = item["ft1"].As(); + ASSERT_EQ(val, result); + }; + select(row1.second, "52 +двадцать +три", "оценка !52! майкл джордан !23! пятьдесят"); + select(row2.second, "восемь", "!8!"); + select(row3.second, "сорок", "!41! цифра и еще цифра !241!"); + select(row3.second, "один", "!41! цифра и еще цифра !241!"); + select(row4.second, "сто конец", "начало !120! цифра и еще цифра !9120 конец!"); + select(row4.second, "тысяч", "начало 120 цифра и еще цифра !9120! конец"); + select(row5.second, "пять", "слово один !пять! два !5! семь 7 ещё !пять! слово"); + select(row6.second, "пять", "слово один !5! два !пять! семь 7 ещё !5! слово"); + select(row7.second, "миллиардов", "1000000000000 !1000000000 50000000055! 1000000"); + select(row8.second, "\"=семьдесят =семь\"", "70 1 7 !77 377 70 7!"); } // Make sure FT seeks by a huge number set by string in DSL @@ -853,6 +880,9 @@ TEST_P(FTGenericApi, HugeNumberToWordsSelect) { "+четыреста +сорок"); // Make sure it found this only string ASSERT_TRUE(qr.Count() == 1); + auto item = qr.begin().GetItem(); + std::string json = item["ft1"].As(); + ASSERT_EQ(json, "много !7343121521906522180408440! денег"); } // Make sure way too huge numbers are ignored in FT @@ -1935,7 +1965,7 @@ TEST_P(FTGenericApi, FrisoTestSelect) { "俊逸", "假的", "pnh", "245mm", "哭著", "谷底", "汆", "意表", "liuchiu", "殆", "mhw5500fw"}; for (unsigned int i = 0; i < testData.size(); i++) { - std::string findWord = testData[i]; + const std::string& findWord = testData[i]; if (findWord == "~" || findWord == "*" || findWord == "-" || findWord == "<" || findWord == ">" || findWord == "," || findWord == "」") { continue; diff --git a/cpp_src/gtests/tests/unit/ft/ft_incremental_build.cc b/cpp_src/gtests/tests/unit/ft/ft_incremental_build.cc index cc095e7d2..fad5d5fcb 100644 --- a/cpp_src/gtests/tests/unit/ft/ft_incremental_build.cc +++ b/cpp_src/gtests/tests/unit/ft/ft_incremental_build.cc @@ -173,8 +173,8 @@ class FTIncrementalBuildApi : public FTApi { return query; } - reindexer::QueryResults SimpleSelect(std::string query) { - auto q = reindexer::Query(GetDefaultNamespace()).Where("ft3", CondEq, std::move(query)).WithRank(); + reindexer::QueryResults SimpleSelect(std::string_view query) { + auto q = reindexer::Query(GetDefaultNamespace()).Where("ft3", CondEq, query).WithRank(); reindexer::QueryResults res; auto err = rt.reindexer->Select(q, res); EXPECT_TRUE(err.ok()) << err.what(); diff --git a/cpp_src/gtests/tests/unit/namespace_test.cc b/cpp_src/gtests/tests/unit/namespace_test.cc index 93e7f9032..1735fa2c5 100644 --- a/cpp_src/gtests/tests/unit/namespace_test.cc +++ b/cpp_src/gtests/tests/unit/namespace_test.cc @@ -2917,10 +2917,9 @@ TEST_F(NsApi, TestUpdateFieldWithExpressions) { } static void checkQueryDsl(const Query& src) { - Query dst; const std::string dsl = src.GetJSON(); - Error err = dst.FromJSON(dsl); - EXPECT_TRUE(err.ok()) << err.what(); + Query dst; + EXPECT_NO_THROW(dst = Query::FromJSON(dsl)); bool objectValues = false; if (src.UpdateFields().size() > 0) { EXPECT_TRUE(src.UpdateFields().size() == dst.UpdateFields().size()); @@ -2997,6 +2996,18 @@ TEST_F(NsApi, TestModifyQueriesSqlEncoder) { Query q7 = Query::FromSQL(sqlSpeccharsUpdate); EXPECT_EQ(q7.GetSQL(), sqlSpeccharsUpdate); checkQueryDsl(q7); + + { + // Check from #674 + Query q = Query::FromSQL( + "explain select id, name, count(*) from ns where (a = 100 and b = 10 equal_position(a,b)) or (c < 10 and d = 77 " + "equal_position(c,d)) inner join (select * from ns2 where not a == 0) on ns.id == ns2.id order by id limit 100 offset 10"); + q.Merge(Query("ns3")); + q.Merge(Query("ns4")); + Query dst; + EXPECT_NO_THROW(dst = Query::FromJSON(q.GetJSON())); + ASSERT_EQ(q.GetSQL(), dst.GetSQL()); + } } static void generateObject(reindexer::JsonBuilder& builder, const std::string& prefix, ReindexerApi* rtapi) { diff --git a/cpp_src/gtests/tests/unit/queries_test.cc b/cpp_src/gtests/tests/unit/queries_test.cc index 90b40ac82..08c15ccee 100644 --- a/cpp_src/gtests/tests/unit/queries_test.cc +++ b/cpp_src/gtests/tests/unit/queries_test.cc @@ -161,7 +161,9 @@ TEST_F(QueriesApi, TransactionStress) { current_size = 350; uint32_t stepSize = 1000; - for (size_t i = 0; i < 4; i++) { + constexpr size_t kThreads = 4; + pool.reserve(kThreads); + for (size_t i = 0; i < kThreads; i++) { pool.push_back(std::thread([this, i, ¤t_size, stepSize]() { size_t start_pos = i * stepSize; if (i % 2 == 0) { @@ -640,22 +642,18 @@ TEST_F(QueriesApi, DslGenerateParse) { EXPECT_EQ(ser.Slice(), dsl); } if (direction & PARSE) { - Query parsed; try { - const auto err = parsed.FromJSON(dsl); - ASSERT_TRUE(err.ok()) << err.what() << "\nDSL: " << dsl; + Query parsed = Query::FromJSON(dsl); + EXPECT_EQ(parsed, q) << dsl; } catch (const Error& err) { ADD_FAILURE() << "Unexpected error: " << err.what() << "\nDSL: " << dsl; continue; } - EXPECT_EQ(parsed, q) << dsl; } } else { const Error& expectedErr = std::get(expected); - Query parsed; try { - const auto err = parsed.FromJSON(dsl); - ASSERT_TRUE(err.ok()) << err.what(); + Query parsed = Query::FromJSON(dsl); ADD_FAILURE() << "Expected error: " << expectedErr.what() << "\nDSL: " << dsl; } catch (const Error& err) { EXPECT_EQ(err.what(), expectedErr.what()) << "\nDSL: " << dsl; @@ -1213,7 +1211,7 @@ std::string print(const reindexer::Query& q, reindexer::QueryResults::Iterator& } void QueriesApi::sortByNsDifferentTypesImpl(std::string_view fillingNs, const reindexer::Query& qTemplate, const std::string& sortPrefix) { - const auto addItem = [&](int id, auto v) { + const auto addItem = [&](int id, const auto& v) { reindexer::WrSerializer ser; { reindexer::JsonBuilder json{ser}; diff --git a/cpp_src/gtests/tests/unit/replication_test.cc b/cpp_src/gtests/tests/unit/replication_test.cc deleted file mode 100644 index e111ff124..000000000 --- a/cpp_src/gtests/tests/unit/replication_test.cc +++ /dev/null @@ -1,702 +0,0 @@ -#include -#include -#include "cluster/stats/replicationstats.h" -#include "replication_load_api.h" -#include "wal/walrecord.h" - -using reindexer::Query; - -// clang-format off -constexpr std::string_view kReplTestSchema1 = R"xxx( - { - "required": [ - "id" - ], - "properties": { - "id": { - "type": "number" - }, - "Field": { - "type": "number" - } - }, - "additionalProperties": false, - "type": "object", - "x-protobuf-ns-number": 99998 - })xxx"; - - -constexpr std::string_view kReplTestSchema2 = R"xxx( - { - "required": [ - "id" - ], - "properties": { - "id": { - "type": "number" - }, - "data": { - "type": "number" - }, - "data123": { - "type": "string" - }, - "f": { - "type": "bolean" - } - }, - "additionalProperties": false, - "type": "object", - "x-protobuf-ns-number": 99999 - })xxx"; -// clang-format on - -TEST_F(ReplicationLoadApi, Base) { - // Check replication in multithread mode with data writes and server restarts - std::atomic leaderWasRestarted = false; - const std::string kNsSome = "some"; - const std::string kNsSome1 = "some1"; - InitNs(); - stop = false; - SetWALSize(masterId_, 100000, kNsSome); - WaitSync(kNsSome); - WaitSync(kNsSome1); - - FillData(1000); - - std::thread destroyer([this, &leaderWasRestarted]() { - int count = 0; - while (!stop) { - if (!(count % 30)) { - auto restartId = rand() % kDefaultServerCount; - RestartServer(restartId); - if (restartId == masterId_) { - leaderWasRestarted = true; - } - } - std::this_thread::sleep_for(std::chrono::milliseconds(100)); - } - }); - - std::thread statsReader([this]() { - while (!stop) { - GetReplicationStats(masterId_); - std::this_thread::sleep_for(std::chrono::milliseconds(100)); - } - }); - - SetWALSize(masterId_, 50000, kNsSome); - for (size_t i = 0; i < 2; ++i) { - if (i % 3 == 0) { - DeleteFromMaster(); - } - SetWALSize(masterId_, (int64_t(i) + 1) * 25000, kNsSome1); - FillData(1000); - GetReplicationStats(masterId_); - SetWALSize(masterId_, (int64_t(i) + 1) * 50000, kNsSome); - SimpleSelect(0); - } - - SetWALSize(masterId_, 50000, "some1"); - - stop = true; - destroyer.join(); - statsReader.join(); - - ForceSync(); - WaitSync(kNsSome); - WaitSync(kNsSome1); - - std::this_thread::sleep_for(std::chrono::seconds(1)); // Add some time for stats stabilization - - // Check final stats - auto stats = GetReplicationStats(masterId_); - EXPECT_EQ(stats.logLevel, LogTrace); - // Validate force/wal syncs - if (leaderWasRestarted) { - EXPECT_GE(stats.forceSyncs.count + stats.walSyncs.count, 2 * (kDefaultServerCount - 1)) - << "Force syncs: " << stats.forceSyncs.count << "; WAL syncs: " << stats.walSyncs.count; - } else { - EXPECT_GE(stats.walSyncs.count, kDefaultServerCount - 1); - EXPECT_GT(stats.walSyncs.avgTimeUs, 0); - EXPECT_GT(stats.walSyncs.maxTimeUs, 0); - } - if (stats.forceSyncs.count > 0) { - EXPECT_GT(stats.forceSyncs.avgTimeUs, 0); - EXPECT_GT(stats.forceSyncs.maxTimeUs, 0); - } else { - EXPECT_EQ(stats.forceSyncs.avgTimeUs, 0); - EXPECT_EQ(stats.forceSyncs.maxTimeUs, 0); - } - if (stats.walSyncs.count > 0) { - EXPECT_GT(stats.walSyncs.avgTimeUs, 0); - EXPECT_GT(stats.walSyncs.maxTimeUs, 0); - } else { - EXPECT_EQ(stats.walSyncs.avgTimeUs, 0); - EXPECT_EQ(stats.walSyncs.maxTimeUs, 0); - } - // Validate nodes/ns states - auto replConf = GetSrv(masterId_)->GetServerConfig(ServerControl::ConfigType::Namespace); - ASSERT_EQ(replConf.nodes.size(), stats.nodeStats.size()); - for (auto& nodeStat : stats.nodeStats) { - using namespace reindexer::cluster; - auto dsnIt = std::find_if(replConf.nodes.begin(), replConf.nodes.end(), - [&nodeStat](const AsyncReplicationConfigTest::Node& node) { return nodeStat.dsn == node.dsn; }); - ASSERT_NE(dsnIt, replConf.nodes.end()) << fmt::sprintf("Unexpected dsn value: %s", nodeStat.dsn); - ASSERT_EQ(nodeStat.status, NodeStats::Status::Online); - ASSERT_EQ(nodeStat.syncState, NodeStats::SyncState::OnlineReplication); - ASSERT_EQ(nodeStat.role, RaftInfo::Role::Follower); - ASSERT_TRUE(nodeStat.namespaces.empty()); - } -} - -TEST_F(ReplicationLoadApi, UpdateTimeAfterRestart) { - InitNs(); - - constexpr std::string_view kNs = "some"; - constexpr int kTargetSrv = 1; - SetOptmizationSortWorkers(kTargetSrv, 0, kNs); - - FillData(10); - WaitSync(kNs); - - const auto state0 = GetSrv(kTargetSrv)->GetState(kNs); - EXPECT_GT(state0.updateUnixNano, 0); - - RestartServer(kTargetSrv); - const auto state1 = GetSrv(kTargetSrv)->GetState(kNs); - ASSERT_EQ(state0.updateUnixNano, state1.updateUnixNano); -} - -TEST_F(ReplicationLoadApi, BaseTagsMatcher) { - StopServer(1); - StopServer(2); - - InitNs(); - SetSchema(masterId_, "some1", kReplTestSchema2); - FillData(1000); - for (size_t i = 0; i < 2; ++i) { - if (i == 1) { - DeleteFromMaster(); - } - FillData(1000); - } - StartServer(1); - StartServer(2); - - ForceSync(); // restart_replicator call syncDatabase (syncByWal or forceSync) - WaitSync("some"); - auto version = ValidateTagsmatchersVersions("some"); - WaitSync("some1"); - ValidateTagsmatchersVersions("some1"); - SetSchema(masterId_, "some", kReplTestSchema1); - WaitSync("some"); - ValidateTagsmatchersVersions("some", version); - ValidateSchemas("some", kReplTestSchema1); - ValidateSchemas("some1", kReplTestSchema2); -} - -#if !defined(REINDEX_WITH_TSAN) -TEST_F(ReplicationLoadApi, SingleSlaveTest) { - // Check replication in multithread mode with data writes, delete queries and server restarts - InitNs(); - stop = false; - FillData(1000); - - std::thread writingThread([this]() { - while (!stop) { - FillData(1000); - } - }); - - std::thread removingThread([this]() { - size_t counter = 0; - while (!stop) { - std::this_thread::sleep_for(std::chrono::seconds(3)); - int i = rand() % 2; - counter++; - - RestartServer(i); - if (counter % 3 == 0) { - DeleteFromMaster(); - } - } - }); - - for (size_t i = 0; i < 2; ++i) { - SimpleSelect(0); - SetWALSize(masterId_, (int64_t(i) + 1) * 1000, "some1"); - SetWALSize(masterId_, (int64_t(i) + 1) * 1000, "some"); - std::this_thread::sleep_for(std::chrono::seconds(3)); - } - - stop = true; - writingThread.join(); - removingThread.join(); - ForceSync(); - WaitSync("some"); - WaitSync("some1"); -} -#endif - -TEST_F(ReplicationLoadApi, WALResizeStaticData) { - // Check WAL resizing with constant data part - InitNs(); - - const std::string nsName("some"); - auto master = GetSrv(masterId_)->api.reindexer; - // Check new wal size with empty namespace - ASSERT_NO_FATAL_FAILURE(SetWALSize(masterId_, 1000, nsName)); - { - BaseApi::QueryResultsType qr(kResultsWithPayloadTypes | kResultsCJson | kResultsWithItemID | kResultsWithRaw); - Error err = master->Select(Query(nsName).Where("#lsn", CondGt, int64_t(0)), qr); - EXPECT_TRUE(err.ok()) << err.what(); - EXPECT_EQ(qr.Count(), 4); - } - { - BaseApi::QueryResultsType qr(kResultsWithPayloadTypes | kResultsCJson | kResultsWithItemID | kResultsWithRaw); - Error err = master->Select(Query(nsName).Where("#lsn", CondGt, int64_t(2)), qr); - EXPECT_TRUE(err.ok()) << err.what(); - EXPECT_EQ(qr.Count(), 2); - } - - // Add data, which do not exceed current wal size - FillData(500); - - BaseApi::QueryResultsType qrLast100_1(kResultsWithPayloadTypes | kResultsCJson | kResultsWithItemID | kResultsWithRaw); - BaseApi::QueryResultsType qrLast100_2(kResultsWithPayloadTypes | kResultsCJson | kResultsWithItemID | kResultsWithRaw); - BaseApi::QueryResultsType qrLast100_3(kResultsWithPayloadTypes | kResultsCJson | kResultsWithItemID | kResultsWithRaw); - - { - BaseApi::QueryResultsType qr(kResultsWithPayloadTypes | kResultsCJson | kResultsWithItemID | kResultsWithRaw); - Error err = master->Select(Query(nsName).Where("#lsn", CondGt, int64_t(0)), qr); - EXPECT_TRUE(err.ok()) << err.what(); - EXPECT_EQ(qr.Count(), 504); - } - { - BaseApi::QueryResultsType qr(kResultsWithPayloadTypes | kResultsCJson | kResultsWithItemID | kResultsWithRaw); - Error err = master->Select(Query(nsName).Where("#lsn", CondGt, int64_t(503)), qr); - EXPECT_TRUE(err.ok()) << err.what(); - EXPECT_EQ(qr.Count(), 1); - } - { - BaseApi::QueryResultsType qr(kResultsWithPayloadTypes | kResultsCJson | kResultsWithItemID | kResultsWithRaw); - Error err = master->Select(Query(nsName).Where("#lsn", CondGt, int64_t(504)), qr); - EXPECT_EQ(err.code(), errOutdatedWAL) << err.what(); - EXPECT_EQ(qr.Count(), 0); - } - { - Error err = master->Select(Query(nsName).Where("#lsn", CondGt, int64_t(404)), qrLast100_1); - EXPECT_TRUE(err.ok()) << err.what(); - EXPECT_EQ(qrLast100_1.Count(), 100); - } - // Set wal size, which is less than current data count - ASSERT_NO_FATAL_FAILURE(SetWALSize(masterId_, 100, nsName)); - { - BaseApi::QueryResultsType qr(kResultsWithPayloadTypes | kResultsCJson | kResultsWithItemID | kResultsWithRaw); - Error err = master->Select(Query(nsName).Where("#lsn", CondGt, int64_t(403)), qr); - EXPECT_EQ(err.code(), errOutdatedWAL) << err.what(); - } - { - Error err = master->Select(Query(nsName).Where("#lsn", CondGt, int64_t(404)), qrLast100_2); - EXPECT_TRUE(err.ok()) << err.what(); - EXPECT_EQ(qrLast100_2.Count(), 100); - } - { - BaseApi::QueryResultsType qr(kResultsWithPayloadTypes | kResultsCJson | kResultsWithItemID | kResultsWithRaw); - Error err = master->Select(Query(nsName).Where("#lsn", CondGt, int64_t(503)), qr); - EXPECT_TRUE(err.ok()) << err.what(); - EXPECT_EQ(qr.Count(), 1); - } - { - BaseApi::QueryResultsType qr(kResultsWithPayloadTypes | kResultsCJson | kResultsWithItemID | kResultsWithRaw); - Error err = master->Select(Query(nsName).Where("#lsn", CondGt, int64_t(504)), qr); - EXPECT_EQ(err.code(), errOutdatedWAL) << err.what(); - EXPECT_EQ(qr.Count(), 0); - } - // Set wal size, which is larger than current data count - ASSERT_NO_FATAL_FAILURE(SetWALSize(masterId_, 2000, nsName)); - { - BaseApi::QueryResultsType qr(kResultsWithPayloadTypes | kResultsCJson | kResultsWithItemID | kResultsWithRaw); - Error err = master->Select(Query(nsName).Where("#lsn", CondGt, int64_t(403)), qr); - EXPECT_EQ(err.code(), errOutdatedWAL) << err.what(); - EXPECT_EQ(qr.Count(), 0); - } - { - Error err = master->Select(Query(nsName).Where("#lsn", CondGt, int64_t(404)), qrLast100_3); - EXPECT_TRUE(err.ok()) << err.what(); - EXPECT_EQ(qrLast100_3.Count(), 100); - } - { - BaseApi::QueryResultsType qr(kResultsWithPayloadTypes | kResultsCJson | kResultsWithItemID | kResultsWithRaw); - Error err = master->Select(Query(nsName).Where("#lsn", CondGt, int64_t(503)), qr); - EXPECT_TRUE(err.ok()) << err.what(); - EXPECT_EQ(qr.Count(), 1); - } - { - BaseApi::QueryResultsType qr(kResultsWithPayloadTypes | kResultsCJson | kResultsWithItemID | kResultsWithRaw); - Error err = master->Select(Query(nsName).Where("#lsn", CondGt, int64_t(504)), qr); - EXPECT_EQ(err.code(), errOutdatedWAL) << err.what(); - EXPECT_EQ(qr.Count(), 0); - } - - auto qrToSet = [](const BaseApi::QueryResultsType& qr) { - std::unordered_set items; - reindexer::WrSerializer ser; - for (auto& item : qr) { - if (item.IsRaw()) { - reindexer::WALRecord rec(item.GetRaw()); - EXPECT_EQ(rec.type, reindexer::WalReplState); - } else { - ser.Reset(); - auto err = item.GetCJSON(ser, false); - EXPECT_TRUE(err.ok()); - items.emplace(ser.Slice()); - } - } - return items; - }; - // Validate, that there are some records, which were not changed after all the wal resizings - auto items_1 = qrToSet(qrLast100_1); - auto items_2 = qrToSet(qrLast100_2); - auto items_3 = qrToSet(qrLast100_3); - ASSERT_EQ(items_1.size(), 99); - ASSERT_TRUE(items_1 == items_2); - ASSERT_TRUE(items_1 == items_3); -} - -TEST_F(ReplicationLoadApi, WALResizeDynamicData) { - // Check WAL resizing in combination with data refilling - InitNs(); - - // Check case, when new wal size is larger, than actual records count, and records count does not exceed wal size after setting - const std::string nsName("some"); - ASSERT_NO_FATAL_FAILURE(SetWALSize(masterId_, 1000, nsName)); - FillData(500); - - // Check case, when new wal size is less, than actual records count - auto master = GetSrv(masterId_)->api.reindexer; - ASSERT_NO_FATAL_FAILURE(SetWALSize(masterId_, 100, nsName)); - FillData(50); - { - BaseApi::QueryResultsType qr(kResultsWithPayloadTypes | kResultsCJson | kResultsWithItemID | kResultsWithRaw); - Error err = master->Select(Query(nsName).Where("#lsn", CondGt, int64_t(453)), qr); - EXPECT_EQ(err.code(), errOutdatedWAL) << err.what(); - } - { - BaseApi::QueryResultsType qr(kResultsWithPayloadTypes | kResultsCJson | kResultsWithItemID | kResultsWithRaw); - Error err = master->Select(Query(nsName).Where("#lsn", CondGt, int64_t(454)), qr); - EXPECT_TRUE(err.ok()) << err.what(); - EXPECT_EQ(qr.Count(), 100); - } - // Check case, when new wal size is larger, than actual records count, and records count exceeds wal size after setting - ASSERT_NO_FATAL_FAILURE(SetWALSize(masterId_, 200, nsName)); - FillData(500); - { - BaseApi::QueryResultsType qr(kResultsWithPayloadTypes | kResultsCJson | kResultsWithItemID | kResultsWithRaw); - Error err = master->Select(Query(nsName).Where("#lsn", CondGt, int64_t(853)), qr); - EXPECT_EQ(err.code(), errOutdatedWAL) << err.what(); - EXPECT_EQ(qr.Count(), 0); - } - { - BaseApi::QueryResultsType qr(kResultsWithPayloadTypes | kResultsCJson | kResultsWithItemID | kResultsWithRaw); - Error err = master->Select(Query(nsName).Where("#lsn", CondGt, int64_t(854)), qr); - EXPECT_TRUE(err.ok()) << err.what(); - EXPECT_EQ(qr.Count(), 200); - } - { - BaseApi::QueryResultsType qr(kResultsWithPayloadTypes | kResultsCJson | kResultsWithItemID | kResultsWithRaw); - Error err = master->Select(Query(nsName).Where("#lsn", CondGt, int64_t(1053)), qr); - EXPECT_TRUE(err.ok()) << err.what(); - EXPECT_EQ(qr.Count(), 1); - } - { - BaseApi::QueryResultsType qr(kResultsWithPayloadTypes | kResultsCJson | kResultsWithItemID | kResultsWithRaw); - Error err = master->Select(Query(nsName).Where("#lsn", CondGt, int64_t(1054)), qr); - EXPECT_EQ(err.code(), errOutdatedWAL) << err.what(); - EXPECT_EQ(qr.Count(), 0); - } -} - -TEST_F(ReplicationLoadApi, ConfigReadingOnStartup) { - // Check if server reads config on startup - const size_t kTestServerID = 0; - - auto srv = GetSrv(kTestServerID); - const auto kReplFilePath = srv->GetReplicationConfigFilePath(); - const auto kAsyncReplFilePath = srv->GetAsyncReplicationConfigFilePath(); - srv.reset(); - StopServer(kTestServerID); - WriteConfigFile(kAsyncReplFilePath, - "role: none\n" - "mode: default\n" - "retry_sync_interval_msec: 3000\n" - "syncs_per_thread: 2\n" - "app_name: node_XXX\n" - "force_sync_on_logic_error: true\n" - "force_sync_on_wrong_data_hash: false\n" - "online_updates_delay_msec: 200\n" - "namespaces: []\n" - "nodes: []"); - WriteConfigFile(kReplFilePath, - "server_id: 4\n" - "cluster_id: 2\n"); - StartServer(kTestServerID); - AsyncReplicationConfigTest config("none", {}, true, false, 4, "node_XXX", {}, "default", 200); - CheckReplicationConfigNamespace(kTestServerID, config); -} - -TEST_F(ReplicationLoadApi, DuplicatePKFollowerTest) { - InitNs(); - const unsigned int kItemCount = 5; - auto srv = GetSrv(masterId_); - auto& api = srv->api; - - std::string changedIds; - const unsigned int kChangedCount = 2; - std::unordered_set ids; - for (unsigned i = 0; i < kChangedCount; ++i) { - ids.insert(std::rand() % kItemCount); - } - - bool isFirst = true; - for (const auto id : ids) { - if (!isFirst) { - changedIds += ", "; - } - changedIds += std::to_string(id); - isFirst = false; - } - - std::unordered_map> items; - Error err; - for (size_t i = 0; i < kItemCount; ++i) { - std::string jsonChange; - BaseApi::ItemType item = api.NewItem("some"); - auto json = fmt::sprintf(R"json({"id":%d,"int":%d,"string":"%s","uuid":"%s"})json", i, i + 100, std::to_string(1 + 1000), nilUUID); - err = item.FromJSON(json); - api.Upsert("some", item); - jsonChange = json; - int idNew = i; - if (ids.find(i) != ids.end()) { - jsonChange = fmt::sprintf(R"json({"id":%d,"int":%d,"string":"%s","uuid":"%s"})json", kItemCount * 2 + i, i + 100, - std::to_string(1 + 1000), nilUUID); - idNew = kItemCount * 2 + i; - } - items.emplace(idNew, std::make_pair(json, jsonChange)); - } - - WaitSync("some"); - { - BaseApi::QueryResultsType qr; - err = api.reindexer->Select("Update some set id=id+" + std::to_string(kItemCount * 2) + " where id in(" + changedIds + ")", qr); - ASSERT_TRUE(err.ok()) << err.what(); - WaitSync("some"); - } - - for (size_t k = 0; k < GetServersCount(); k++) { - auto server = GetSrv(k); - { - BaseApi::QueryResultsType qr; - err = server->api.reindexer->Select("select * from some order by id", qr); - ASSERT_TRUE(err.ok()) << err.what(); - ASSERT_EQ(qr.Count(), items.size()); - for (auto i : qr) { - reindexer::WrSerializer ser; - err = i.GetJSON(ser, false); - gason::JsonParser parser; - auto root = parser.Parse(ser.Slice()); - int id = root["id"].As(); - ASSERT_TRUE(err.ok()) << err.what(); - ASSERT_EQ(ser.Slice(), items[id].second); - } - } - { - for (auto id : ids) { - BaseApi::QueryResultsType qr; - err = server->api.reindexer->Select("select * from some where id=" + std::to_string(id), qr); - ASSERT_TRUE(err.ok()) << err.what(); - ASSERT_EQ(qr.Count(), 0); - } - } - { - for (auto id : ids) { - BaseApi::QueryResultsType qr; - err = server->api.reindexer->Select("select * from some where id=" + std::to_string(id + kItemCount * 2), qr); - ASSERT_TRUE(err.ok()) << err.what(); - ASSERT_EQ(qr.Count(), 1); - } - } - } -} - -TEST_F(ReplicationLoadApi, ConfigSync) { - // Check automatic replication config file and #config namespace sync - using ReplNode = AsyncReplicationConfigTest::Node; - const size_t kTestServerID = 0; - - SCOPED_TRACE("Set replication config via file"); - RestartWithReplicationConfigFiles(kTestServerID, - "role: none\n" - "retry_sync_interval_msec: 3000\n" - "syncs_per_thread: 2\n" - "app_name: node_1\n" - "force_sync_on_logic_error: true\n" - "force_sync_on_wrong_data_hash: false\n" - "namespaces: []\n" - "nodes: []", - "server_id: 3\n" - "cluster_id: 2\n"); - // Validate config file - AsyncReplicationConfigTest config("none", {}, true, false, 3, "node_1", {}, "default"); - CheckReplicationConfigNamespace(kTestServerID, config); - - config = - AsyncReplicationConfigTest("leader", {ReplNode{DSN("cproto://127.0.0.1:53019/db")}, ReplNode{DSN("cproto://127.0.0.1:53020/db")}}, - false, true, 3, "node_1", {"ns1", "ns2"}, "default"); - SCOPED_TRACE("Set replication config(two nodes) via namespace"); - SetServerConfig(kTestServerID, config); - // Validate #config namespace - CheckReplicationConfigFile(kTestServerID, config); - - config = - AsyncReplicationConfigTest("leader", {ReplNode{DSN("cproto://127.0.0.1:45000/db")}}, false, true, 3, "node_xxx", {}, "default"); - SCOPED_TRACE("Set replication config(one node) via namespace"); - SetServerConfig(kTestServerID, config); - // Validate replication.conf file - CheckReplicationConfigFile(kTestServerID, config); - - config = AsyncReplicationConfigTest("leader", {ReplNode{DSN("cproto://127.0.0.1:45000/db"), {{"ns1", "ns2"}}}}, false, true, 3, - "node_xxx", {}, "default", 150); - SCOPED_TRACE("Set replication config with custom ns list for existing node via namespace"); - SetServerConfig(kTestServerID, config); - // Validate replication.conf file - CheckReplicationConfigFile(kTestServerID, config); - std::this_thread::sleep_for(std::chrono::seconds(2)); // In case if OS doesn't have nanosecods in stat result - - SCOPED_TRACE("Set replication config via file"); - GetSrv(kTestServerID) - ->WriteAsyncReplicationConfig( - "role: leader\n" - "retry_sync_interval_msec: 3000\n" - "syncs_per_thread: 2\n" - "app_name: node_1\n" - "force_sync_on_logic_error: false\n" - "force_sync_on_wrong_data_hash: true\n" - "online_updates_delay_msec: 50\n" - "namespaces:\n" - " - ns1\n" - " - ns3\n" - "nodes:\n" - " -\n" - " dsn: cproto://127.0.0.1:53001/db1\n" - " namespaces:\n" - " - ns4\n" - " -\n" - " dsn: cproto://127.0.0.1:53002/db2\n"); - config = AsyncReplicationConfigTest( - "leader", {ReplNode{DSN("cproto://127.0.0.1:53001/db1"), {{"ns4"}}}, ReplNode{DSN("cproto://127.0.0.1:53002/db2")}}, false, true, 3, - "node_1", {"ns1", "ns3"}, "default", 50); - // Validate #config namespace - CheckReplicationConfigNamespace(kTestServerID, config, std::chrono::seconds(3)); - - SCOPED_TRACE("Check server id switch"); - GetSrv(kTestServerID) - ->WriteReplicationConfig( - "server_id: 2\n" - "cluster_id: 2\n"); - config.serverId = 2; - // Validate #config namespace - CheckReplicationConfigNamespace(kTestServerID, config, std::chrono::seconds(3)); -} - -#if !defined(REINDEX_WITH_TSAN) -TEST_F(ReplicationLoadApi, DynamicRoleSwitch) { - // Validate replication behavior after node's role switch - InitNs(); - stop = false; - - // Create #config changing threads - std::vector configUpdateThreads(GetServersCount()); - for (size_t i = 0; i < configUpdateThreads.size(); ++i) { - configUpdateThreads[i] = std::thread( - [this](size_t id) { - while (!stop) { - std::this_thread::sleep_for(std::chrono::milliseconds(100)); - size_t cnt = rand() % 5; - SetOptmizationSortWorkers(id, cnt, "*"); - } - }, - i); - } - - // Switch master and await sync in each loop iteration - const size_t kPortionSize = 2000; - size_t expectedLsnCounter = 3; - for (size_t i = 1; i < 8; i++) { - FillData(kPortionSize); - expectedLsnCounter += kPortionSize; - WaitSync("some", reindexer::lsn_t(expectedLsnCounter, masterId_)); - WaitSync("some1", reindexer::lsn_t(expectedLsnCounter, masterId_)); - SwitchMaster(i % kDefaultServerCount, {"some", "some1"}, (i % 2 == 0) ? "default" : "from_sync_leader"); - } - - stop = true; - for (auto& th : configUpdateThreads) { - th.join(); - } -} -#endif - -TEST_F(ReplicationLoadApi, NodeOfflineLastError) { - InitNs(); - - auto leader = GetSrv(0); - StopServer(1); - for (std::size_t i = 0; i < 10; i++) { - auto stats = leader->GetReplicationStats(reindexer::cluster::kAsyncReplStatsType); - if (!stats.nodeStats.empty() && stats.nodeStats[0].lastError.code() == errNetwork) { - break; - } - std::this_thread::sleep_for(std::chrono::seconds(1)); - } - - auto stats = leader->GetReplicationStats(reindexer::cluster::kAsyncReplStatsType); - ASSERT_EQ(stats.nodeStats.size(), std::size_t(3)); - ASSERT_EQ(stats.nodeStats[0].lastError.code(), errNetwork); - ASSERT_FALSE(stats.nodeStats[0].lastError.what().empty()); -} - -TEST_F(ReplicationLoadApi, LogLevel) { - // Check async replication log level setup - InitNs(); - - std::atomic stop = {false}; - std::thread th([this, &stop] { - // Simple insertion thread for race detection - while (!stop) { - FillData(1); - std::this_thread::sleep_for(std::chrono::milliseconds(5)); - } - }); - - // Replication in tests must be started with 'Trace' log level - auto stats = GetReplicationStats(masterId_); - EXPECT_EQ(stats.logLevel, LogTrace); - - // Changing log level - const LogLevel levels[] = {LogInfo, LogTrace, LogWarning, LogError, LogNone, LogInfo}; - for (auto level : levels) { - SetReplicationLogLevel(masterId_, LogLevel(level)); - stats = GetReplicationStats(masterId_); - EXPECT_EQ(stats.logLevel, LogLevel(level)); - } - - // Checking log level after replication restart. It should be reset to 'Trace' - ForceSync(); - stats = GetReplicationStats(masterId_); - EXPECT_EQ(stats.logLevel, LogTrace); - - stop = true; - th.join(); -} diff --git a/cpp_src/gtests/tests/unit/rpcclient_test.cc b/cpp_src/gtests/tests/unit/rpcclient_test.cc index d7436e0d1..cdec962ad 100644 --- a/cpp_src/gtests/tests/unit/rpcclient_test.cc +++ b/cpp_src/gtests/tests/unit/rpcclient_test.cc @@ -1333,8 +1333,20 @@ TEST_F(RPCClientTestApi, QuerySetObjectUpdate) { insertFn(kNsName, kNsSize); + client::CoroQueryResults qr; + { + err = rx.Update(Query(kNsName).Where("id", CondGe, "0").SetObject("nested", Variant(std::string(R"([{"field": 1240}])"))), qr); + ASSERT_FALSE(err.ok()); + EXPECT_EQ(err.what(), "Error modifying field value: 'Unsupported JSON format. Unnamed field detected'"); + } + + { + err = rx.Update(Query(kNsName).Where("id", CondGe, "0").SetObject("nested", Variant(std::string(R"({{"field": 1240}})"))), qr); + ASSERT_FALSE(err.ok()); + EXPECT_EQ(err.what(), "Error modifying field value: 'JSONDecoder: Error parsing json: unquoted key, pos 15'"); + } + { - client::CoroQueryResults qr; // R"(UPDATE TestQuerySetObjectUpdate SET nested = {"field": 1240} where id >= 0)" auto query = Query(kNsName).Where("id", CondGe, "0").SetObject("nested", Variant(std::string(R"({"field": 1240})"))); err = rx.Update(query, qr); diff --git a/cpp_src/gtests/tests/unit/sharding_system_test.cc b/cpp_src/gtests/tests/unit/sharding_system_test.cc index bf2a04796..bc858fabd 100644 --- a/cpp_src/gtests/tests/unit/sharding_system_test.cc +++ b/cpp_src/gtests/tests/unit/sharding_system_test.cc @@ -252,6 +252,7 @@ TEST_F(ShardingSystemApi, AwaitShards) { std::shared_ptr rx = getNode(0)->api.reindexer; std::vector tds; + tds.reserve(kThreads); for (size_t i = 0; i < kThreads; ++i) { tds.emplace_back([&] { std::unique_lock lck(mtx); @@ -302,6 +303,7 @@ TEST_F(ShardingSystemApi, AwaitShardsTimeout) { std::atomic done = false; std::shared_ptr rx = getNode(0)->api.reindexer; std::vector tds; + tds.reserve(kThreads); for (size_t i = 0; i < kThreads; ++i) { tds.emplace_back([&] { auto err = rx->OpenNamespace(kNewNs); diff --git a/cpp_src/gtests/tests/unit/string_function_test.cc b/cpp_src/gtests/tests/unit/string_function_test.cc index 62ad2cea4..307a5b373 100644 --- a/cpp_src/gtests/tests/unit/string_function_test.cc +++ b/cpp_src/gtests/tests/unit/string_function_test.cc @@ -1,4 +1,5 @@ -#if defined(__GNUC__) && (__GNUC__ == 12) && defined(REINDEX_WITH_ASAN) +#if defined(__GNUC__) && ((__GNUC__ == 12) || (__GNUC__ == 13)) && defined(REINDEX_WITH_ASAN) +// regex header is broken in GCC 12.0-13.3 with ASAN #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" #include @@ -7,6 +8,7 @@ #include #endif // REINDEX_WITH_ASAN +#include "core/ft/numtotext.h" #include "gtest/gtest.h" #include "reindexer_api.h" #include "tools/customlocal.h" @@ -170,3 +172,63 @@ TEST_F(ReindexerApi, LikeWithFullTextIndex) { err = rt.reindexer->Select(Query(default_namespace).Where("name", CondLike, "%" + content[rand() % content.size()]), qr); ASSERT_TRUE(!err.ok()); } + +TEST_F(ReindexerApi, NumToText) { + auto out = [](const std::vector& resNum) { + std::stringstream s; + for (auto& v : resNum) { + s << "[" << v << "] "; + } + s << std::endl; + return s.str(); + }; + std::vector resNum; + bool r = reindexer::NumToText::convert("0", resNum) == std::vector{"ноль"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("00", resNum) == std::vector{"ноль", "ноль"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("000010", resNum) == std::vector{"ноль", "ноль", "ноль", "ноль", "десять"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("01000000", resNum) == std::vector{"ноль", "один", "миллион"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("121", resNum) == std::vector{"сто", "двадцать", "один"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("1", resNum) == std::vector{"один"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("9", resNum) == std::vector{"девять"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("10", resNum) == std::vector{"десять"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("13", resNum) == std::vector{"тринадцать"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("30", resNum) == std::vector{"тридцать"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("48", resNum) == std::vector{"сорок", "восемь"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("100", resNum) == std::vector{"сто"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("500", resNum) == std::vector{"пятьсот"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("999", resNum) == std::vector{"девятьсот", "девяносто", "девять"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("1000", resNum) == std::vector{"одна", "тысяча"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("1001", resNum) == std::vector{"одна", "тысяча", "один"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("5111", resNum) == std::vector{"пять", "тысяч", "сто", "одиннадцать"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("777101", resNum) == + std::vector{"семьсот", "семьдесят", "семь", "тысяч", "сто", "один"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("1000000000", resNum) == std::vector{"один", "миллиард"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("1005000000", resNum) == std::vector{"один", "миллиард", "пять", "миллионов"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("50000000055", resNum) == + std::vector{"пятьдесят", "миллиардов", "пятьдесят", "пять"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("100000000000000000000000000", resNum) == std::vector{"сто", "септиллионов"}; + ASSERT_TRUE(r) << out(resNum); + r = reindexer::NumToText::convert("1000000000000000000000000000", resNum) == std::vector{}; + ASSERT_TRUE(r) << out(resNum); +} diff --git a/cpp_src/gtests/tests/unit/synccororeindexer_test.cc b/cpp_src/gtests/tests/unit/synccororeindexer_test.cc index b3630a895..d3aa62dc5 100644 --- a/cpp_src/gtests/tests/unit/synccororeindexer_test.cc +++ b/cpp_src/gtests/tests/unit/synccororeindexer_test.cc @@ -370,6 +370,7 @@ TEST(SyncCoroRx, StopWhileWriting) { ASSERT_TRUE(err.ok()) << err.what(); std::vector writingThreads; + writingThreads.reserve(kWritingThreadsCount); for (size_t i = 0; i < kWritingThreadsCount; ++i) { writingThreads.emplace_back([&client, &state, &kNsName, kWritingThreadSleep]() noexcept { int i = 0; diff --git a/cpp_src/gtests/tests/unit/tolal_lru_cache.cc b/cpp_src/gtests/tests/unit/tolal_lru_cache.cc index 80832acba..573956d4e 100644 --- a/cpp_src/gtests/tests/unit/tolal_lru_cache.cc +++ b/cpp_src/gtests/tests/unit/tolal_lru_cache.cc @@ -82,7 +82,7 @@ TEST(LruCache, SimpleTest) { QueryCountCache cache(reindexer::kDefaultCacheSizeLimit, reindexer::kDefaultHitCountToCache); PRINTF("checking query cache...\n"); - for (auto i = 0; i < kIterCount; i++) { + for (i = 0; i < kIterCount; i++) { auto idx = rand() % qs.size(); auto& qce = qs.at(idx); QueryCacheKey ckey{qce.q, kCountCachedKeyMode, qce.JoinedSelectorsPtr()}; @@ -91,7 +91,7 @@ TEST(LruCache, SimpleTest) { if (cached.valid) { ASSERT_TRUE(exist) << "query missing in query cache"; - ASSERT_EQ(cached.val.total_count, qce.expectedTotal) << "cached data are not valid"; + ASSERT_EQ(cached.val.totalCount, qce.expectedTotal) << "cached data are not valid"; } else { size_t total = static_cast(rand() % 10000); cache.Put(ckey, QueryCountCacheVal{total}); diff --git a/cpp_src/net/cproto/cproto.h b/cpp_src/net/cproto/cproto.h index 5eb7139c0..af8bc525d 100644 --- a/cpp_src/net/cproto/cproto.h +++ b/cpp_src/net/cproto/cproto.h @@ -81,7 +81,7 @@ enum CmdCode : uint16_t { std::string_view CmdName(uint16_t cmd) noexcept; // Maximum number of active queries per client -const uint32_t kMaxConcurentQueries = 256; +const uint32_t kMaxConcurrentQueries = 256; // Maximum number of active snapshots per client const uint32_t kMaxConcurentSnapshots = 8; diff --git a/cpp_src/net/cproto/dispatcher.h b/cpp_src/net/cproto/dispatcher.h index 492af9559..ce9c5ecad 100644 --- a/cpp_src/net/cproto/dispatcher.h +++ b/cpp_src/net/cproto/dispatcher.h @@ -1,6 +1,7 @@ #pragma once #include "args.h" +#include "core/keyvalue/p_string.h" #include "cproto.h" #include "estl/chunk.h" #include "net/connectinstatscollector.h" @@ -151,14 +152,34 @@ class Dispatcher { template ::value, int> = 0> static T get_arg(const Args& args, size_t index, const Context& ctx) { - if (index >= args.size()) { - throw Error(errParams, "Invalid args of %s call; argument %d is not submitted", CmdName(ctx.call->cmd), static_cast(index)); + if rx_unlikely (index >= args.size()) { + throw Error(errParams, "Invalid args of %s call; argument %d is not submitted", CmdName(ctx.call->cmd), + static_cast(index)); + } + if rx_unlikely (!args[index].Type().IsSame(KeyValueType::From())) { + throw Error(errLogic, "Incorrect variant type of %s call, argument index %d, type '%s', expected type '%s'", + CmdName(ctx.call->cmd), static_cast(index), args[index].Type().Name(), KeyValueType::From().Name()); } return T(args[index]); } template ::value, int> = 0> - static T get_arg(const Args& args, size_t index, const Context&) { - return index < args.size() ? T(typename T::value_type(args[index])) : T(); + static T get_arg(const Args& args, size_t index, const Context& ctx) { + if rx_unlikely (index >= args.size()) { +#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 9 +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#endif + return T(); +#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 9 +#pragma GCC diagnostic pop +#endif + } + if rx_unlikely (!args[index].Type().IsSame(KeyValueType::From())) { + throw Error(errLogic, "Incorrect variant type of %s call, argument index %d, type '%s', optional expected type '%s'", + CmdName(ctx.call->cmd), static_cast(index), args[index].Type().Name(), + KeyValueType::From().Name()); + } + return T(typename T::value_type(args[index])); } template diff --git a/cpp_src/net/cproto/serverconnection.cc b/cpp_src/net/cproto/serverconnection.cc index 4fc2360d3..6cb580ce8 100644 --- a/cpp_src/net/cproto/serverconnection.cc +++ b/cpp_src/net/cproto/serverconnection.cc @@ -259,21 +259,33 @@ ServerConnection::BaseConnT::ReadResT ServerConnection::onRead() { Args ctxArgs; ctxArgs.Unpack(ser); if (ctxArgs.size() > 0) { + if (!ctxArgs[0].Type().IsSame(KeyValueType::From())) { + throw Error(errLogic, "Incorrect variant type for 'execTimeout' type='%s'", ctxArgs[0].Type().Name()); + } ctx.call->execTimeout = milliseconds(int64_t(ctxArgs[0])); } else { ctx.call->execTimeout = milliseconds(0); } if (ctxArgs.size() > 1) { + if (!ctxArgs[1].Type().IsSame(KeyValueType::From())) { + throw Error(errLogic, "Incorrect variant type for 'lsn' type='%s'", ctxArgs[1].Type().Name()); + } ctx.call->lsn = lsn_t(int64_t(ctxArgs[1])); } else { ctx.call->lsn = lsn_t(); } if (ctxArgs.size() > 2) { + if (!ctxArgs[2].Type().IsSame(KeyValueType::From())) { + throw Error(errLogic, "Incorrect variant type for 'emmiterServerId' type='%s'", ctxArgs[2].Type().Name()); + } ctx.call->emmiterServerId = int64_t(ctxArgs[2]); } else { ctx.call->emmiterServerId = -1; } if (ctxArgs.size() > 3) { + if (!ctxArgs[3].Type().IsSame(KeyValueType::From())) { + throw Error(errLogic, "Incorrect variant type for 'shardIdValue' type='%s'", ctxArgs[3].Type().Name()); + } const int64_t shardIdValue = int64_t(ctxArgs[3]); if (shardIdValue < 0) { if (shardIdValue < std::numeric_limits::min()) { diff --git a/cpp_src/net/listener.cc b/cpp_src/net/listener.cc index 639bcfb81..933569fae 100644 --- a/cpp_src/net/listener.cc +++ b/cpp_src/net/listener.cc @@ -34,7 +34,7 @@ Listener::Listener(ev::dynamic_loop& loop, std::shared_ptr shared) template Listener::Listener(ev::dynamic_loop& loop, ConnectionFactory&& connFactory, openssl::SslCtxPtr sslCtx, int maxListeners) - : Listener(loop, std::make_shared(std::move(connFactory), (maxListeners ? maxListeners : hardware_concurrency()) + 1, + : Listener(loop, std::make_shared(std::move(connFactory), (maxListeners ? maxListeners : (double(hardware_concurrency()) * 1.2)) + 1, std::move(sslCtx))) {} template diff --git a/cpp_src/server/contrib/server.md b/cpp_src/server/contrib/server.md index 1af15304d..3dc2ae7ec 100644 --- a/cpp_src/server/contrib/server.md +++ b/cpp_src/server/contrib/server.md @@ -92,6 +92,7 @@ * [JoinCacheMemStats](#joincachememstats) * [JoinedDef](#joineddef) * [JsonObjectDef](#jsonobjectdef) + * [LRUCachePerfStats](#lrucacheperfstats) * [LongQueriesLogging](#longquerieslogging) * [MetaByKeyResponse](#metabykeyresponse) * [MetaInfo](#metainfo) @@ -139,7 +140,7 @@ Reindexer is compact, fast and it does not have heavy dependencies. ### Version information -*Version* : 4.19.0 +*Version* : 4.20.0 ### License information @@ -2696,6 +2697,18 @@ Join cache stats. Stores results of selects to right table by ON condition +### LRUCachePerfStats +Performance statistics for specific LRU-cache instance + + +|Name|Description|Schema| +|---|---|---| +|**cache_hit_rate**
*optional*|Cache hit rate (hits / total_queries)
**Minimum value** : `0`
**Maximum value** : `1`|number| +|**is_active**
*optional*|Determines if cache is currently in use. Usually it has 'false' value for uncommited indexes|boolean| +|**total_queries**
*optional*|Queries total count
**Minimum value** : `0`|integer| + + + ### LongQueriesLogging Parameters for logging long queries and transactions @@ -2801,8 +2814,10 @@ List of meta info of the specified namespace |Name|Description|Schema| |---|---|---| -|**indexes**
*optional*|Memory consumption of each namespace index|< [indexes](#namespaceperfstats-indexes) > array| +|**indexes**
*optional*|Performance statistics for each namespace index|< [indexes](#namespaceperfstats-indexes) > array| +|**join_cache**
*optional*|Joins cache statistics|[LRUCachePerfStats](#lrucacheperfstats)| |**name**
*optional*|Name of namespace|string| +|**query_count_cache**
*optional*|Queries cache statistics (for the queries with COUNT_CACHED() aggregation)|[LRUCachePerfStats](#lrucacheperfstats)| |**selects**
*optional*||[SelectPerfStats](#selectperfstats)| |**transactions**
*optional*||[TransactionsPerfStats](#transactionsperfstats)| |**updates**
*optional*||[UpdatePerfStats](#updateperfstats)| @@ -2812,6 +2827,7 @@ List of meta info of the specified namespace |Name|Description|Schema| |---|---|---| +|**cache**
*optional*|If index does not use IDs cache at all, this struct won't be present in response|[LRUCachePerfStats](#lrucacheperfstats)| |**name**
*optional*|Name of index|string| |**selects**
*optional*||[SelectPerfStats](#selectperfstats)| |**updates**
*optional*||[UpdatePerfStats](#updateperfstats)| @@ -2852,6 +2868,7 @@ List of meta info of the specified namespace |**optimization_sort_workers**
*optional*|Maximum number of background threads of sort indexes optimization. 0 - disable sort optimizations|integer| |**optimization_timeout_ms**
*optional*|Timeout before background indexes optimization start after last update. 0 - disable optimizations|integer| |**start_copy_policy_tx_size**
*optional*|Enable namespace copying for transaction with steps count greater than this value (if copy_politics_multiplier also allows this)|integer| +|**strict_mode**
*optional*|Strict mode for queries. Adds additional check for fields('names')/indexes('indexes') existence in sorting and filtering conditions|enum (none, names, indexes)| |**sync_storage_flush_limit**
*optional*|Enables synchronous storage flush inside write-calls, if async updates count is more than sync_storage_flush_limit. 0 - disables synchronous storage flush, in this case storage will be flushed in background thread only
**Minimum value** : `0`|integer| |**tx_size_to_always_copy**
*optional*|Force namespace copying for transaction with steps count greater than this value|integer| |**unload_idle_threshold**
*optional*|Unload namespace data from RAM after this idle timeout in seconds. If 0, then data should not be unloaded|integer| @@ -2999,7 +3016,7 @@ Performance statistics per each query |Name|Description|Schema| |---|---|---| |**cluster_id**
*optional*|Cluser ID - must be same for client and for master|integer| -|**server_id**
*optional*|Node identifier. Should be unique for each node in the replicated cluster (non-unique IDs are also allowed, but may lead to the inconsistency in some cases
**Maximum value** : `999`|integer| +|**server_id**
*optional*|Node identifier. Should be unique for each node in the replicated cluster (non-unique IDs are also allowed, but may lead to the inconsistency in some cases
**Minimum value** : `0`
**Maximum value** : `999`|integer| diff --git a/cpp_src/server/contrib/server.yml b/cpp_src/server/contrib/server.yml index 5bcdc5749..7828e7e0a 100644 --- a/cpp_src/server/contrib/server.yml +++ b/cpp_src/server/contrib/server.yml @@ -4,46 +4,46 @@ info: **Reindexer** is an embeddable, in-memory, document-oriented database with a high-level Query builder interface. Reindexer's goal is to provide fast search with complex queries. Reindexer is compact, fast and it does not have heavy dependencies. - version: "4.19.0" + version: "4.20.0" title: "Reindexer REST API" license: name: "Apache 2.0" url: "http://www.apache.org/licenses/LICENSE-2.0.html" basePath: "/api/v1" tags: -- name: "databases" - description: "Databases management" -- name: "namespaces" - description: "Namespaces management" -- name: "items" - description: "Documents management" -- name: "indexes" - description: "Indexes management" -- name: "queries" - description: "Queries to reindexer (dsl/sql)" -- name: "system" - description: "System methods" + - name: "databases" + description: "Databases management" + - name: "namespaces" + description: "Namespaces management" + - name: "items" + description: "Documents management" + - name: "indexes" + description: "Indexes management" + - name: "queries" + description: "Queries to reindexer (dsl/sql)" + - name: "system" + description: "System methods" schemes: -- "http" + - "http" produces: -- "application/json" + - "application/json" paths: /db: post: tags: - - "databases" + - "databases" summary: "Create new database" description: | This operation will create new database. If database is already exists, then error will be returned. operationId: "createDatabase" parameters: - - in: body - name: "body" - description: "Database definition" - required: true - schema: - $ref: "#/definitions/Database" + - in: body + name: "body" + description: "Database definition" + required: true + schema: + $ref: "#/definitions/Database" responses: 200: $ref: "#/responses/OK" @@ -60,19 +60,19 @@ paths: get: tags: - - "databases" + - "databases" summary: "List available databases" description: | This operation will output list of all available databases operationId: "describeDatabases" parameters: - - name: "sort_order" - in: query - type: string - description: "Sort Order" - enum: - - "asc" - - "desc" + - name: "sort_order" + in: query + type: string + description: "Sort Order" + enum: + - "asc" + - "desc" responses: 200: description: "successful operation" @@ -92,7 +92,7 @@ paths: /db/{database}: delete: tags: - - "databases" + - "databases" summary: "Drop database" description: | This operation will remove complete database from memory and disk. @@ -100,11 +100,11 @@ paths: Can not be undone. USE WITH CAUTION. operationId: "dropDatabase" parameters: - - name: "database" - in: path - type: string - description: "Database name" - required: true + - name: "database" + in: path + type: string + description: "Database name" + required: true responses: 200: $ref: "#/responses/OK" @@ -122,24 +122,24 @@ paths: /db/{database}/namespaces: post: tags: - - "namespaces" + - "namespaces" summary: "Create namespace" description: | This operation will create new namespace in specified database. If namespace is already exists, then operation do not nothing. operationId: "openNamespace" parameters: - - in: body - name: "body" - description: "Namespace definition" - required: true - schema: - $ref: "#/definitions/Namespace" - - name: "database" - in: path - type: string - description: "Database name" - required: true + - in: body + name: "body" + description: "Namespace definition" + required: true + schema: + $ref: "#/definitions/Namespace" + - name: "database" + in: path + type: string + description: "Database name" + required: true responses: 200: $ref: "#/responses/OK" @@ -155,25 +155,25 @@ paths: $ref: "#/responses/UnexpectedError" get: tags: - - "namespaces" + - "namespaces" summary: "List available namespaces" description: | This operation will list all available namespaces in specified database. If database is not exists, then error will be returned. operationId: "describeNamespaces" parameters: - - name: "database" - in: path - type: string - description: "Database name" - required: true - - name: "sort_order" - in: query - type: string - description: "Sort Order" - enum: - - "asc" - - "desc" + - name: "database" + in: path + type: string + description: "Database name" + required: true + - name: "sort_order" + in: query + type: string + description: "Sort Order" + enum: + - "asc" + - "desc" responses: 200: description: "successful operation" @@ -193,22 +193,22 @@ paths: /db/{database}/namespaces/{name}: get: tags: - - "namespaces" + - "namespaces" summary: "Get namespace description" description: | This operation will return specified namespace description, including options of namespace, and available indexes operationId: "describeCurrNamespace" parameters: - - name: "database" - in: path - type: string - description: "Database name" - required: true - - name: "name" - in: path - type: string - description: "Namespace name" - required: true + - name: "database" + in: path + type: string + description: "Database name" + required: true + - name: "name" + in: path + type: string + description: "Namespace name" + required: true responses: 200: description: "successful operation" @@ -226,7 +226,7 @@ paths: $ref: "#/responses/UnexpectedError" delete: tags: - - "namespaces" + - "namespaces" summary: "Drop namespace" description: | This operation will delete completely namespace from memory and disk. @@ -234,16 +234,16 @@ paths: Can not be undone. USE WITH CAUTION. operationId: "dropNamespace" parameters: - - name: "database" - in: path - type: string - description: "Database name" - required: true - - name: "name" - in: path - type: string - description: "Namespace name" - required: true + - name: "database" + in: path + type: string + description: "Database name" + required: true + - name: "name" + in: path + type: string + description: "Namespace name" + required: true responses: 200: $ref: "#/responses/OK" @@ -261,22 +261,22 @@ paths: /db/{database}/namespaces/{name}/truncate: delete: tags: - - "namespaces" + - "namespaces" summary: "Truncate namespace" description: | This operation will delete all items from namespace. operationId: "truncateNamespace" parameters: - - name: "database" - in: path - type: string - description: "Database name" - required: true - - name: "name" - in: path - type: string - description: "Namespace name" - required: true + - name: "database" + in: path + type: string + description: "Database name" + required: true + - name: "name" + in: path + type: string + description: "Namespace name" + required: true responses: 200: $ref: "#/responses/OK" @@ -294,27 +294,27 @@ paths: /db/{database}/namespaces/{name}/rename/{newname}: get: tags: - - "namespaces" + - "namespaces" summary: "Rename namespace" description: | This operation will rename namespace. operationId: "renameNamespace" parameters: - - name: "database" - in: path - type: string - description: "Database name" - required: true - - name: "name" - in: path - type: string - description: "Namespace name" - required: true - - name: "newname" - in: path - type: string - description: "Namespace new name" - required: true + - name: "database" + in: path + type: string + description: "Database name" + required: true + - name: "name" + in: path + type: string + description: "Namespace name" + required: true + - name: "newname" + in: path + type: string + description: "Namespace new name" + required: true responses: 200: @@ -334,47 +334,47 @@ paths: /db/{database}/namespaces/{name}/metalist: get: tags: - - "namespaces" + - "namespaces" summary: "Get list of namespace's meta info" description: | This operation will return list of keys of all meta of specified namespace operationId: "getNamespaceMetalist" parameters: - - name: "database" - in: path - type: string - description: "Database name" - required: true - - name: "name" - in: path - type: string - description: "Namespace name" - required: true - - name: "sort_order" - in: query - required: false - description: "Sort Order" - type: string - enum: - - "asc" - - "desc" - - name: "with_values" - in: query - required: false - description: "Include values in response" - type: boolean - default: false - - name: "offset" - in: query - required: false - type: integer - default: 0 - - name: "limit" - in: query - required: false - type: integer - description: "If 0 - no limit" - default: 0 + - name: "database" + in: path + type: string + description: "Database name" + required: true + - name: "name" + in: path + type: string + description: "Namespace name" + required: true + - name: "sort_order" + in: query + required: false + description: "Sort Order" + type: string + enum: + - "asc" + - "desc" + - name: "with_values" + in: query + required: false + description: "Include values in response" + type: boolean + default: false + - name: "offset" + in: query + required: false + type: integer + default: 0 + - name: "limit" + in: query + required: false + type: integer + description: "If 0 - no limit" + default: 0 responses: 200: description: "successful operation" @@ -394,27 +394,27 @@ paths: /db/{database}/namespaces/{name}/metabykey/{key}: get: tags: - - "namespaces" + - "namespaces" summary: "Get namespace's meta info by key" description: | This operation will return value of namespace's meta with specified key operationId: "getNamespaceMetaByKey" parameters: - - name: "database" - in: path - type: string - description: "Database name" - required: true - - name: "name" - in: path - type: string - description: "Namespace name" - required: true - - name: "key" - in: path - required: true - description: "Meta key" - type: string + - name: "database" + in: path + type: string + description: "Database name" + required: true + - name: "name" + in: path + type: string + description: "Namespace name" + required: true + - name: "key" + in: path + required: true + description: "Meta key" + type: string responses: 200: description: "Successful operation" @@ -432,27 +432,27 @@ paths: $ref: "#/responses/UnexpectedError" delete: tags: - - "namespaces" + - "namespaces" summary: "Remove namespace's meta info for key" description: | This operation will remove meta with specified key from namespace operationId: "deleteNamespaceMetaByKey" parameters: - - name: "database" - in: path - type: string - description: "Database name" - required: true - - name: "name" - in: path - type: string - description: "Namespace name" - required: true - - name: "key" - in: path - required: true - description: "Meta key" - type: string + - name: "database" + in: path + type: string + description: "Database name" + required: true + - name: "name" + in: path + type: string + description: "Namespace name" + required: true + - name: "key" + in: path + required: true + description: "Meta key" + type: string responses: 200: $ref: "#/responses/OK" @@ -470,28 +470,28 @@ paths: /db/{database}/namespaces/{name}/metabykey: put: tags: - - "namespaces" + - "namespaces" summary: "Put namespace's meta info with specified key and value" description: | This operation will set namespace's meta with specified key and value operationId: "putNamespaceMetaByKey" parameters: - - name: "database" - in: path - type: string - description: "Database name" - required: true - - name: "name" - in: path - type: string - description: "Namespace name" - required: true - - name: "meta_info" - in: body - required: true - description: "Meta info" - schema: - $ref: "#/definitions/MetaInfo" + - name: "database" + in: path + type: string + description: "Database name" + required: true + - name: "name" + in: path + type: string + description: "Namespace name" + required: true + - name: "meta_info" + in: body + required: true + description: "Meta info" + schema: + $ref: "#/definitions/MetaInfo" responses: 200: description: "successful operation" @@ -511,7 +511,7 @@ paths: /db/{database}/namespaces/{name}/items: delete: tags: - - "items" + - "items" summary: "Delete documents from namespace" operationId: "deleteItems" description: | @@ -523,29 +523,29 @@ paths: ... ``` parameters: - - in: body - name: "body" - schema: - type: object - required: true - - name: "database" - in: path - type: string - description: "Database name" - required: true - - name: "name" - in: path - type: string - description: "Namespace name" - required: true - - name: "precepts" - in: query - type: array - collectionFormat: "multi" - description: "Precepts to be done" - required: false - items: + - in: body + name: "body" + schema: + type: object + required: true + - name: "database" + in: path + type: string + description: "Database name" + required: true + - name: "name" + in: path type: string + description: "Namespace name" + required: true + - name: "precepts" + in: query + type: array + collectionFormat: "multi" + description: "Precepts to be done" + required: false + items: + type: string responses: 200: description: "successful operation" @@ -563,7 +563,7 @@ paths: $ref: "#/responses/UnexpectedError" post: tags: - - "items" + - "items" summary: "Insert documents to namespace" operationId: "postItems" description: | @@ -575,38 +575,38 @@ paths: ... ``` parameters: - - in: body - name: "body" - schema: - type: object - required: true - - name: "database" - in: path - type: string - description: "Database name" - required: true - - name: "name" - in: path - type: string - description: "Namespace name" - required: true - - name: "precepts" - in: query - type: array - collectionFormat: "multi" - description: "Precepts to be done" - required: false - items: + - in: body + name: "body" + schema: + type: object + required: true + - name: "database" + in: path type: string - - name: "format" - in: query - type: string - description: "encoding data format" - required: false - enum: - - json - - msgpack - - protobuf + description: "Database name" + required: true + - name: "name" + in: path + type: string + description: "Namespace name" + required: true + - name: "precepts" + in: query + type: array + collectionFormat: "multi" + description: "Precepts to be done" + required: false + items: + type: string + - name: "format" + in: query + type: string + description: "encoding data format" + required: false + enum: + - json + - msgpack + - protobuf responses: 200: description: "successful operation" @@ -624,7 +624,7 @@ paths: $ref: "#/responses/UnexpectedError" put: tags: - - "items" + - "items" summary: "Update documents in namespace" operationId: "putItems" description: | @@ -636,38 +636,38 @@ paths: ... ``` parameters: - - in: body - name: "body" - schema: - type: object - required: true - - name: "database" - in: path - type: string - description: "Database name" - required: true - - name: "name" - in: path - type: string - description: "Namespace name" - required: true - - name: "precepts" - in: query - type: array - collectionFormat: "multi" - description: "Precepts to be done" - required: false - items: + - in: body + name: "body" + schema: + type: object + required: true + - name: "database" + in: path type: string - - name: "format" - in: query - type: string - description: "encoding data format" - required: false - enum: - - json - - msgpack - - protobuf + description: "Database name" + required: true + - name: "name" + in: path + type: string + description: "Namespace name" + required: true + - name: "precepts" + in: query + type: array + collectionFormat: "multi" + description: "Precepts to be done" + required: false + items: + type: string + - name: "format" + in: query + type: string + description: "encoding data format" + required: false + enum: + - json + - msgpack + - protobuf responses: 200: description: "successful operation" @@ -686,7 +686,7 @@ paths: patch: tags: - - "items" + - "items" summary: "Upsert documents in namespace" operationId: "patchItems" description: | @@ -698,38 +698,38 @@ paths: ... ``` parameters: - - in: body - name: "body" - schema: - type: object - required: true - - name: "database" - in: path - type: string - description: "Database name" - required: true - - name: "name" - in: path - type: string - description: "Namespace name" - required: true - - name: "precepts" - in: query - type: array - collectionFormat: "multi" - description: "Precepts to be done" - required: false - items: + - in: body + name: "body" + schema: + type: object + required: true + - name: "database" + in: path type: string - - name: "format" - in: query - type: string - description: "encoding data format" - required: false - enum: - - json - - msgpack - - protobuf + description: "Database name" + required: true + - name: "name" + in: path + type: string + description: "Namespace name" + required: true + - name: "precepts" + in: query + type: array + collectionFormat: "multi" + description: "Precepts to be done" + required: false + items: + type: string + - name: "format" + in: query + type: string + description: "encoding data format" + required: false + enum: + - json + - msgpack + - protobuf responses: 200: description: "successful operation" @@ -747,75 +747,75 @@ paths: $ref: "#/responses/UnexpectedError" get: tags: - - "items" + - "items" summary: "Get documents from namespace" operationId: "getItems" description: | This operation will select documents from namespace with specified filters, and sort them by specified sort order. Paging with limit and offset are supported. parameters: - - name: "database" - in: path - type: string - description: "Database name" - required: true - - name: "name" - in: path - type: string - description: "Namespace name" - required: true - - name: "limit" - in: query - type: integer - description: "Maximum count of returned items" - - name: "offset" - in: query - type: integer - description: "Offset of first returned item" - - name: "sort_field" - in: query - type: string - description: "Sort Field" - - name: "sort_order" - in: query - type: string - description: "Sort Order" - enum: - - "asc" - - "desc" - - name: "filter" - in: query - type: string - description: "Filter with SQL syntax, e.g: field1 = 'v1' AND field2 > 'v2'" - - name: "fields" - in: query - type: string - description: "Comma-separated list of returned fields" - - name: "format" - in: query - type: string - description: "encoding data format" - required: false - enum: - - json - - msgpack - - protobuf - - csv-file - - name: "sharding" - in: query - type: string - description: "if off then get items from current node only" - required: false - enum: - - on - - off - - name: "with_shard_ids" - in: query - type: string - description: "if sharding is enabled, then add the #shard_id field to the item" - required: false - enum: - - on - - off + - name: "database" + in: path + type: string + description: "Database name" + required: true + - name: "name" + in: path + type: string + description: "Namespace name" + required: true + - name: "limit" + in: query + type: integer + description: "Maximum count of returned items" + - name: "offset" + in: query + type: integer + description: "Offset of first returned item" + - name: "sort_field" + in: query + type: string + description: "Sort Field" + - name: "sort_order" + in: query + type: string + description: "Sort Order" + enum: + - "asc" + - "desc" + - name: "filter" + in: query + type: string + description: "Filter with SQL syntax, e.g: field1 = 'v1' AND field2 > 'v2'" + - name: "fields" + in: query + type: string + description: "Comma-separated list of returned fields" + - name: "format" + in: query + type: string + description: "encoding data format" + required: false + enum: + - json + - msgpack + - protobuf + - csv-file + - name: "sharding" + in: query + type: string + description: "if off then get items from current node only" + required: false + enum: + - on + - off + - name: "with_shard_ids" + in: query + type: string + description: "if sharding is enabled, then add the #shard_id field to the item" + required: false + enum: + - on + - off responses: 200: description: "successful operation" @@ -835,22 +835,22 @@ paths: /db/{database}/namespaces/{name}/indexes: get: tags: - - "indexes" + - "indexes" summary: "List available indexes" description: | This operation will return list of available indexes, from specified database and namespace. operationId: "describeIndexes" parameters: - - name: "database" - in: path - type: string - description: "Database name" - required: true - - name: "name" - in: path - type: string - description: "Namespace name" - required: true + - name: "database" + in: path + type: string + description: "Database name" + required: true + - name: "name" + in: path + type: string + description: "Namespace name" + required: true responses: 200: description: "successful operation" @@ -868,29 +868,29 @@ paths: $ref: "#/responses/UnexpectedError" post: tags: - - "indexes" + - "indexes" summary: "Add new index to namespace" operationId: "postIndex" description: | This operation will create new index. If index is already exists with the different parameters, then error will be returned. Operation synchronous, so it can take long time, if namespace contains bunch of documents. parameters: - - in: body - name: "body" - description: "Index definition" - schema: - $ref: "#/definitions/Index" - required: true - - name: "database" - in: path - type: string - description: "Database name" - required: true - - name: "name" - in: path - type: string - description: "Namespace name" - required: true + - in: body + name: "body" + description: "Index definition" + schema: + $ref: "#/definitions/Index" + required: true + - name: "database" + in: path + type: string + description: "Database name" + required: true + - name: "name" + in: path + type: string + description: "Namespace name" + required: true responses: 200: $ref: "#/responses/OK" @@ -906,29 +906,29 @@ paths: $ref: "#/responses/UnexpectedError" put: tags: - - "indexes" + - "indexes" summary: "Update index in namespace" operationId: "putIndex" description: | This operation will update index parameters. E.g. type of field or type of index. Operation synchronous, so it can take long time, if namespace contains bunch of documents parameters: - - in: body - name: "body" - description: "Index definition" - schema: - $ref: "#/definitions/Index" - required: true - - name: "database" - in: path - type: string - description: "Database name" - required: true - - name: "name" - in: path - type: string - description: "Namespace name" - required: true + - in: body + name: "body" + description: "Index definition" + schema: + $ref: "#/definitions/Index" + required: true + - name: "database" + in: path + type: string + description: "Database name" + required: true + - name: "name" + in: path + type: string + description: "Namespace name" + required: true responses: 200: $ref: "#/responses/OK" @@ -946,28 +946,28 @@ paths: /db/{database}/namespaces/{name}/indexes/{indexname}: delete: tags: - - "indexes" + - "indexes" summary: "Drop index from namespace" operationId: "deleteIndex" description: | This operation will remove index from namespace. No data will be erased. Operation synchronous, so it can take long time, if namespace contains bunch of documents. parameters: - - name: "database" - in: path - type: string - description: "Database name" - required: true - - name: "name" - in: path - type: string - description: "Namespace name" - required: true - - name: "indexname" - in: path - type: string - description: "Index name" - required: true + - name: "database" + in: path + type: string + description: "Database name" + required: true + - name: "name" + in: path + type: string + description: "Namespace name" + required: true + - name: "indexname" + in: path + type: string + description: "Index name" + required: true responses: 200: $ref: "#/responses/OK" @@ -985,28 +985,28 @@ paths: /db/{database}/namespaces/{name}/schema: put: tags: - - "schema" + - "schema" summary: "Set namespace schema" operationId: "putSchema" description: | This operation will set namespace schema (information about available fields and field types) parameters: - - in: body - name: "body" - description: This operation will put new schema for specified database and namespace - schema: - $ref: "#/definitions/SchemaDef" - required: true - - name: "database" - in: path - type: string - description: "Database name" - required: true - - name: "name" - in: path - type: string - description: "Namespace name" - required: true + - in: body + name: "body" + description: This operation will put new schema for specified database and namespace + schema: + $ref: "#/definitions/SchemaDef" + required: true + - name: "database" + in: path + type: string + description: "Database name" + required: true + - name: "name" + in: path + type: string + description: "Namespace name" + required: true responses: 200: $ref: "#/responses/OK" @@ -1022,21 +1022,21 @@ paths: $ref: "#/responses/UnexpectedError" get: tags: - - "schema" + - "schema" summary: "Get namespace schema" description: This operation will return current schema from specified database and namespace operationId: "getSchema" parameters: - - name: "database" - in: path - type: string - description: "Database name" - required: true - - name: "name" - in: path - type: string - description: "Namespace name" - required: true + - name: "database" + in: path + type: string + description: "Database name" + required: true + - name: "name" + in: path + type: string + description: "Namespace name" + required: true responses: 200: description: "successful operation" @@ -1056,27 +1056,27 @@ paths: /db/{database}/protobuf_schema: get: produces: - - "text/plain" + - "text/plain" tags: - - "schema" + - "schema" summary: "Get protobuf communication parameters schema" operationId: "getNsParamsSchema" description: | - This operation allows to get client/server communication parameters as google protobuf schema (content of .proto file) + This operation allows to get client/server communication parameters as google protobuf schema (content of .proto file) parameters: - - name: "database" - in: path - type: string - description: "Database name" - required: true - - name: "ns" - in: "query" - type: "array" - collectionFormat: "multi" - description: "Namespace name" - items: - type: "string" - required: true + - name: "database" + in: path + type: string + description: "Database name" + required: true + - name: "ns" + in: "query" + type: "array" + collectionFormat: "multi" + description: "Namespace name" + items: + type: "string" + required: true responses: 200: description: "successful operation" @@ -1094,7 +1094,7 @@ paths: /db/{database}/query: get: tags: - - "queries" + - "queries" summary: "Query documents from namespace" operationId: "getQuery" description: | @@ -1102,52 +1102,52 @@ paths: Two level paging is supported. At first, applied normal SQL `LIMIT` and `OFFSET`, then `limit` and `offset` from http request. parameters: - - name: "database" - in: path - type: string - description: "Database name" - required: true - - name: "q" - in: query - type: string - description: "SQL query" - required: true - - name: "limit" - in: query - type: integer - description: "Maximum count of returned items" - - name: "offset" - in: query - type: integer - description: "Offset of first returned item" - - name: with_columns - in: query - type: boolean - description: "Return columns names and widths for table format output" - required: false - - name: width - in: query - type: integer - description: "Total width in rows of view for table format output" - required: false - - name: "format" - in: query - type: string - description: "encoding data format" - required: false - enum: - - json - - msgpack - - protobuf - - csv-file - - name: "sharding" - in: query - type: string - description: "if off then execute SQL query on current node" - required: false - enum: - - on - - off + - name: "database" + in: path + type: string + description: "Database name" + required: true + - name: "q" + in: query + type: string + description: "SQL query" + required: true + - name: "limit" + in: query + type: integer + description: "Maximum count of returned items" + - name: "offset" + in: query + type: integer + description: "Offset of first returned item" + - name: with_columns + in: query + type: boolean + description: "Return columns names and widths for table format output" + required: false + - name: width + in: query + type: integer + description: "Total width in rows of view for table format output" + required: false + - name: "format" + in: query + type: string + description: "encoding data format" + required: false + enum: + - json + - msgpack + - protobuf + - csv-file + - name: "sharding" + in: query + type: string + description: "if off then execute SQL query on current node" + required: false + enum: + - on + - off responses: 200: @@ -1166,43 +1166,43 @@ paths: $ref: "#/responses/UnexpectedError" post: tags: - - "queries" + - "queries" summary: "Query documents from namespace" operationId: "postQuery" description: | This operation queries documents from namespace by DSL query. parameters: - - name: "database" - in: path - type: string - description: "Database name" - required: true - - in: body - name: "body" - description: "DSL query" - required: true - schema: - $ref: "#/definitions/Query" - - name: with_columns - in: query - type: boolean - description: "Return columns names and widths for table format output" - required: false - - name: width - in: query - type: integer - description: "Total width in rows of view for table format output" - required: false - - name: "format" - in: query - type: string - description: "encoding data format" - required: false - enum: - - json - - msgpack - - protobuf - - csv-file + - name: "database" + in: path + type: string + description: "Database name" + required: true + - in: body + name: "body" + description: "DSL query" + required: true + schema: + $ref: "#/definitions/Query" + - name: with_columns + in: query + type: boolean + description: "Return columns names and widths for table format output" + required: false + - name: width + in: query + type: integer + description: "Total width in rows of view for table format output" + required: false + - name: "format" + in: query + type: string + description: "encoding data format" + required: false + enum: + - json + - msgpack + - protobuf + - csv-file responses: 200: description: "successful operation" @@ -1220,23 +1220,23 @@ paths: $ref: "#/responses/UnexpectedError" put: tags: - - "queries" + - "queries" summary: "Update documents in namespace" operationId: "updateQuery" description: | This operation updates documents in namespace by DSL query. parameters: - - name: "database" - in: path - type: string - description: "Database name" - required: true - - in: body - name: "body" - description: "DSL query" - required: true - schema: - $ref: "#/definitions/Query" + - name: "database" + in: path + type: string + description: "Database name" + required: true + - in: body + name: "body" + description: "DSL query" + required: true + schema: + $ref: "#/definitions/Query" responses: 200: $ref: "#/responses/OK" @@ -1252,23 +1252,23 @@ paths: $ref: "#/responses/UnexpectedError" delete: tags: - - "queries" + - "queries" summary: "Delete documents from namespace" operationId: "deleteQuery" description: | This operation removes documents from namespace by DSL query. parameters: - - name: "database" - in: path - type: string - description: "Database name" - required: true - - in: body - name: "body" - description: "DSL query" - required: true - schema: - $ref: "#/definitions/Query" + - name: "database" + in: path + type: string + description: "Database name" + required: true + - in: body + name: "body" + description: "DSL query" + required: true + schema: + $ref: "#/definitions/Query" responses: 200: $ref: "#/responses/OK" @@ -1286,29 +1286,29 @@ paths: /db/{database}/namespaces/{name}/transactions/begin: post: tags: - - "transactions" + - "transactions" summary: "Begin transaction to namespace" operationId: "beginTx" parameters: - - name: "database" - in: "path" - type: "string" - description: "Database name" - required: true - - name: "name" - in: "path" - type: "string" - description: "Namespace name" - required: true - - name: "format" - in: "query" - type: "string" - description: "encoding data format" - required: false - enum: - - json - - msgpack - - protobuf + - name: "database" + in: "path" + type: "string" + description: "Database name" + required: true + - name: "name" + in: "path" + type: "string" + description: "Namespace name" + required: true + - name: "format" + in: "query" + type: "string" + description: "encoding data format" + required: false + enum: + - json + - msgpack + - protobuf responses: 200: description: "successful operation" @@ -1328,20 +1328,20 @@ paths: /db/{database}/transactions/{tx_id}/commit: post: tags: - - "transactions" + - "transactions" summary: "Commit transaction" operationId: "commitTx" parameters: - - name: "database" - in: "path" - type: "string" - description: "Database name" - required: true - - name: "tx_id" - in: "path" - type: "string" - description: "transaction id" - required: true + - name: "database" + in: "path" + type: "string" + description: "Database name" + required: true + - name: "tx_id" + in: "path" + type: "string" + description: "transaction id" + required: true responses: 200: $ref: "#/responses/OK" @@ -1359,20 +1359,20 @@ paths: /db/{database}/transactions/{tx_id}/rollback: post: tags: - - "transactions" + - "transactions" summary: "Rollback transaction" operationId: "rollbackTx" parameters: - - name: "database" - in: "path" - type: "string" - description: "Database name" - required: true - - name: "tx_id" - in: "path" - type: "string" - description: "transaction id" - required: true + - name: "database" + in: "path" + type: "string" + description: "Database name" + required: true + - name: "tx_id" + in: "path" + type: "string" + description: "transaction id" + required: true responses: 200: $ref: "#/responses/OK" @@ -1390,7 +1390,7 @@ paths: /db/{database}/transactions/{tx_id}/items: delete: tags: - - "transactions" + - "transactions" summary: "Delete documents from namespace via transaction" operationId: "deleteItemsTx" description: | @@ -1403,29 +1403,29 @@ paths: ... ``` parameters: - - in: "body" - name: "body" - schema: - type: "object" - required: true - - name: "database" - in: "path" - type: "string" - description: "Database name" - required: true - - name: "precepts" - in: "query" - type: "array" - collectionFormat: "multi" - description: "Precepts to be done" - required: false - items: + - in: "body" + name: "body" + schema: + type: "object" + required: true + - name: "database" + in: "path" type: "string" - - name: "tx_id" - in: "path" - type: "string" - description: "transaction id" - required: true + description: "Database name" + required: true + - name: "precepts" + in: "query" + type: "array" + collectionFormat: "multi" + description: "Precepts to be done" + required: false + items: + type: "string" + - name: "tx_id" + in: "path" + type: "string" + description: "transaction id" + required: true responses: 200: $ref: "#/responses/OK" @@ -1441,7 +1441,7 @@ paths: $ref: "#/responses/UnexpectedError" post: tags: - - "transactions" + - "transactions" summary: "Insert documents to namespace via transaction" operationId: "postItemsTx" description: | @@ -1454,38 +1454,38 @@ paths: ... ``` parameters: - - in: "body" - name: "body" - schema: - type: "object" - required: true - - name: "database" - in: "path" - type: "string" - description: "Database name" - required: true - - name: "tx_id" - in: "path" - type: "string" - description: "transaction id" - required: true - - name: "precepts" - in: "query" - type: "array" - collectionFormat: "multi" - description: "Precepts to be done" - required: false - items: + - in: "body" + name: "body" + schema: + type: "object" + required: true + - name: "database" + in: "path" type: "string" - - name: "format" - in: "query" - type: "string" - description: "encoding data format" - required: false - enum: - - json - - msgpack - - protobuf + description: "Database name" + required: true + - name: "tx_id" + in: "path" + type: "string" + description: "transaction id" + required: true + - name: "precepts" + in: "query" + type: "array" + collectionFormat: "multi" + description: "Precepts to be done" + required: false + items: + type: "string" + - name: "format" + in: "query" + type: "string" + description: "encoding data format" + required: false + enum: + - json + - msgpack + - protobuf responses: 200: $ref: "#/responses/OK" @@ -1501,7 +1501,7 @@ paths: $ref: "#/responses/UnexpectedError" put: tags: - - "transactions" + - "transactions" summary: "Update documents in namespace via transaction" operationId: "putItemsTx" description: | @@ -1514,38 +1514,38 @@ paths: ... ``` parameters: - - in: "body" - name: "body" - schema: - type: "object" - required: true - - name: "database" - in: "path" - type: "string" - description: "Database name" - required: true - - name: "tx_id" - in: "path" - type: "string" - description: "transaction id" - required: true - - name: "precepts" - in: "query" - type: "array" - collectionFormat: "multi" - description: "Precepts to be done" - required: false - items: + - in: "body" + name: "body" + schema: + type: "object" + required: true + - name: "database" + in: "path" type: "string" - - name: "format" - in: "query" - type: "string" - description: "encoding data format" - required: false - enum: - - json - - msgpack - - protobuf + description: "Database name" + required: true + - name: "tx_id" + in: "path" + type: "string" + description: "transaction id" + required: true + - name: "precepts" + in: "query" + type: "array" + collectionFormat: "multi" + description: "Precepts to be done" + required: false + items: + type: "string" + - name: "format" + in: "query" + type: "string" + description: "encoding data format" + required: false + enum: + - json + - msgpack + - protobuf responses: 200: $ref: "#/responses/OK" @@ -1561,7 +1561,7 @@ paths: $ref: "#/responses/UnexpectedError" patch: tags: - - "transactions" + - "transactions" summary: "Upsert documents in namespace via transaction" operationId: "patchItemsTx" description: | @@ -1574,38 +1574,38 @@ paths: ... ``` parameters: - - in: "body" - name: "body" - schema: - type: "object" - required: true - - name: "database" - in: "path" - type: "string" - description: "Database name" - required: true - - name: "tx_id" - in: "path" - type: "string" - description: "transaction id" - required: true - - name: "precepts" - in: "query" - type: "array" - collectionFormat: "multi" - description: "Precepts to be done" - required: false - items: + - in: "body" + name: "body" + schema: + type: "object" + required: true + - name: "database" + in: "path" type: "string" - - name: "format" - in: "query" - type: "string" - description: "encoding data format" - required: false - enum: - - json - - msgpack - - protobuf + description: "Database name" + required: true + - name: "tx_id" + in: "path" + type: "string" + description: "transaction id" + required: true + - name: "precepts" + in: "query" + type: "array" + collectionFormat: "multi" + description: "Precepts to be done" + required: false + items: + type: "string" + - name: "format" + in: "query" + type: "string" + description: "encoding data format" + required: false + enum: + - json + - msgpack + - protobuf responses: 200: $ref: "#/responses/OK" @@ -1623,42 +1623,42 @@ paths: /db/{database}/transactions/{tx_id}/query: get: tags: - - "transactions" + - "transactions" summary: "Delete/update queries for transactions" operationId: "getQueryTx" description: | This will add DELETE/UPDATE SQL query into transaction. This query UPDATEs/DELETEs documents from namespace parameters: - - name: "database" - in: "path" - type: "string" - description: "Database name" - required: true - - name: "tx_id" - in: "path" - type: "string" - description: "transaction id" - required: true - - name: "q" - in: "query" - type: "string" - description: "SQL query" - required: true - - name: width - in: "query" - type: "integer" - description: "Total width in rows of view for table format output" - required: false - - name: "format" - in: "query" - type: "string" - description: "encoding data format" - required: false - enum: - - json - - msgpack - - protobuf + - name: "database" + in: "path" + type: "string" + description: "Database name" + required: true + - name: "tx_id" + in: "path" + type: "string" + description: "transaction id" + required: true + - name: "q" + in: "query" + type: "string" + description: "SQL query" + required: true + - name: width + in: "query" + type: "integer" + description: "Total width in rows of view for table format output" + required: false + - name: "format" + in: "query" + type: "string" + description: "encoding data format" + required: false + enum: + - json + - msgpack + - protobuf responses: 200: $ref: "#/responses/OK" @@ -1674,34 +1674,34 @@ paths: $ref: "#/responses/UnexpectedError" delete: tags: - - "transactions" + - "transactions" summary: "Delete documents from namespace (transactions)" operationId: "deleteQueryTx" description: | This will add DELETE query into transaction. DELETE query removes documents from namespace by DSL query. parameters: - - name: "database" - in: "path" - type: "string" - description: "Database name" - required: true - - name: "tx_id" - in: "path" - type: "string" - description: "transaction id" - required: true - - in: "body" - name: "body" - description: "DSL query" - required: true - schema: - $ref: "#/definitions/Query" - - name: "tx_id" - in: "query" - type: "string" - description: "transaction id" - required: false + - name: "database" + in: "path" + type: "string" + description: "Database name" + required: true + - name: "tx_id" + in: "path" + type: "string" + description: "transaction id" + required: true + - in: "body" + name: "body" + description: "DSL query" + required: true + schema: + $ref: "#/definitions/Query" + - name: "tx_id" + in: "query" + type: "string" + description: "transaction id" + required: false responses: 200: $ref: "#/responses/OK" @@ -1719,32 +1719,32 @@ paths: /db/{database}/suggest: get: tags: - - "queries" + - "queries" summary: "Suggest for autocompletion of SQL query" operationId: "getSuggest" description: | This operation pareses SQL query, and suggests autocompletion variants parameters: - - name: "database" - in: path - type: string - description: "Database name" - required: true - - name: "q" - in: query - type: string - description: "SQL query" - required: true - - name: "pos" - in: query - type: integer - description: "Cursor position for suggest" - required: true - - name: "line" - in: query - type: integer - description: "Cursor line for suggest" - required: true + - name: "database" + in: path + type: string + description: "Database name" + required: true + - name: "q" + in: query + type: string + description: "SQL query" + required: true + - name: "pos" + in: query + type: integer + description: "Cursor position for suggest" + required: true + - name: "line" + in: query + type: integer + description: "Cursor line for suggest" + required: true responses: 200: description: "successful operation" @@ -1764,43 +1764,43 @@ paths: /db/{database}/sqlquery: post: tags: - - "queries" + - "queries" summary: "Query documents from namespace" operationId: "postSQLQuery" description: | This operation queries documents from namespace by SQL query. Query can be preceded by `EXPLAIN` statement, then query execution plan will be returned with query results. parameters: - - name: "database" - in: path - type: string - description: "Database name" - required: true - - name: "q" - in: body - schema: + - name: "database" + in: path type: string - description: "SQL query" - required: true - - name: with_columns - in: query - type: boolean - description: "Return columns names and widths for table format output" - required: false - - name: width - in: query - type: integer - description: "Total width in rows of view for table format output" - required: false - - name: "format" - in: query - type: string - description: "encoding data format" - required: false - enum: - - json - - msgpack - - protobuf - - csv-file + description: "Database name" + required: true + - name: "q" + in: body + schema: + type: string + description: "SQL query" + required: true + - name: with_columns + in: query + type: boolean + description: "Return columns names and widths for table format output" + required: false + - name: width + in: query + type: integer + description: "Total width in rows of view for table format output" + required: false + - name: "format" + in: query + type: string + description: "encoding data format" + required: false + enum: + - json + - msgpack + - protobuf + - csv-file responses: 200: description: "successful operation" @@ -1820,7 +1820,7 @@ paths: /check: get: tags: - - "system" + - "system" summary: "Get system information" description: "This operation will return system information about server version, uptime, and resources consumption" operationId: "getSysInfo" @@ -1892,16 +1892,16 @@ paths: /db/{database}/namespaces/%23activitystats/items: get: tags: - - "system" + - "system" summary: "Get activity stats information" description: "This operation will return detailed information about current activity of all connected to the database clients" operationId: "getActivityStats" parameters: - - name: "database" - in: path - type: string - description: "Database name" - required: true + - name: "database" + in: path + type: string + description: "Database name" + required: true responses: 200: description: "successful operation" @@ -1921,16 +1921,16 @@ paths: /db/{database}/namespaces/%23clientsstats/items: get: tags: - - "system" + - "system" summary: "Get client connection information" description: "This operation will return detailed information about all connections on the server" operationId: "getClientsStats" parameters: - - name: "database" - in: path - type: string - description: "Database name" - required: true + - name: "database" + in: path + type: string + description: "Database name" + required: true responses: 200: description: "successful operation" @@ -1950,21 +1950,21 @@ paths: /db/{database}/namespaces/%23replicationstats/items: get: tags: - - "system" + - "system" summary: "Get replication statistics" description: "This operation will return detailed informatiom about replication status on this node or cluster" operationId: "getGlobalReplicationStats" parameters: - - name: "database" - in: path - type: string - description: "Database name" - required: true - - name: "filter" - in: query - type: string - description: "Filter with SQL syntax, e.g: field1 = 'v1' AND field2 > 'v2'. Has to filter by 'type' field: either 'async' or 'cluster'" - required: true + - name: "database" + in: path + type: string + description: "Database name" + required: true + - name: "filter" + in: query + type: string + description: "Filter with SQL syntax, e.g: field1 = 'v1' AND field2 > 'v2'. Has to filter by 'type' field: either 'async' or 'cluster'" + required: true responses: 200: description: "successful operation" @@ -1982,16 +1982,16 @@ paths: /db/{database}/namespaces/%23memstats/items: get: tags: - - "system" + - "system" summary: "Get memory stats information" description: "This operation will return detailed information about database memory consumption" operationId: "getMemStats" parameters: - - name: "database" - in: path - type: string - description: "Database name" - required: true + - name: "database" + in: path + type: string + description: "Database name" + required: true responses: 200: description: "successful operation" @@ -2011,16 +2011,16 @@ paths: /db/{database}/namespaces/%23perfstats/items: get: tags: - - "system" + - "system" summary: "Get performance stats information" description: "This operation will return detailed information about database performance timings. By default performance stats is turned off." operationId: "getPerfStats" parameters: - - name: "database" - in: path - type: string - description: "Database name" - required: true + - name: "database" + in: path + type: string + description: "Database name" + required: true responses: 200: description: "successful operation" @@ -2040,16 +2040,16 @@ paths: /db/{database}/namespaces/%23queriesperfstats/items: get: tags: - - "system" + - "system" summary: "Get SELECT queries performance stats information" description: "This operation will return detailed information about database memory consumption. By default quires performance stat is turned off." operationId: "getQueriesPerfStats" parameters: - - name: "database" - in: path - type: string - description: "Database name" - required: true + - name: "database" + in: path + type: string + description: "Database name" + required: true responses: 200: description: "successful operation" @@ -2069,7 +2069,7 @@ paths: /db/{database}/namespaces/%23config/items: put: tags: - - "system" + - "system" summary: "Update system config" operationId: "putSystemConfig" description: | @@ -2077,16 +2077,16 @@ paths: - profiling configuration. It is used to enable recording of queries and overall performance; - log queries configuration. parameters: - - in: body - name: "body" - schema: - $ref: "#/definitions/SystemConfigItem" - required: true - - name: "database" - in: path - type: string - description: "Database name" - required: true + - in: body + name: "body" + schema: + $ref: "#/definitions/SystemConfigItem" + required: true + - name: "database" + in: path + type: string + description: "Database name" + required: true responses: 200: description: "successful operation" @@ -2215,13 +2215,13 @@ definitions: type: string description: "Current operation state" enum: - - "in_progress" - - "wait_lock" - - "sending" - - "indexes_lookup" - - "select_loop" - - "proxied_via_cluster_proxy" - - "proxied_via_sharding_proxy" + - "in_progress" + - "wait_lock" + - "sending" + - "indexes_lookup" + - "select_loop" + - "proxied_via_cluster_proxy" + - "proxied_via_sharding_proxy" lock_description: type: string @@ -2423,13 +2423,13 @@ definitions: type: object properties: total_items: - type: integer - description: "Total count of databases" + type: integer + description: "Total count of databases" items: - type: array - items: - description: "Name of database" - type: string + type: array + items: + description: "Name of database" + type: string Database: type: object @@ -2469,7 +2469,7 @@ definitions: description: "If true, then documents will be stored to disc storage, else all data will be lost on server shutdown" indexes: type: array - items: + items: $ref: "#/definitions/Index" Index: @@ -2480,7 +2480,7 @@ definitions: - field_type - index_type properties: - name: + name: description: "Name of index, can contains letters, digits and underscores" type: string pattern: "^[A-Za-z0-9_\\-]*$" @@ -2496,24 +2496,24 @@ definitions: type: string description: "Field data type" enum: - - "int" - - "int64" - - "double" - - "string" - - "bool" - - "composite" - - "point" + - "int" + - "int64" + - "double" + - "string" + - "bool" + - "composite" + - "point" index_type: type: string description: "Index structure type" default: "hash" enum: - - "hash" - - "tree" - - "text" - - "rtree" - - "ttl" - - "-" + - "hash" + - "tree" + - "text" + - "rtree" + - "ttl" + - "-" expire_after: description: "Specify, time to live for ttl index, in seconds" type: integer @@ -2536,10 +2536,10 @@ definitions: type: string description: "Algorithm to construct RTree index" enum: - - "linear" - - "quadratic" - - "greene" - - "rstar" + - "linear" + - "quadratic" + - "greene" + - "rstar" default: "rstar" is_simple_tag: description: "Use simple tag instead of actual index, which will notice rx about possible field name for strict policies" @@ -2550,10 +2550,10 @@ definitions: description: "String collate mode" default: "none" enum: - - "none" - - "ascii" - - "utf8" - - "numeric" + - "none" + - "ascii" + - "utf8" + - "numeric" sort_order_letters: type: string description: "Sort order letters" @@ -2586,10 +2586,10 @@ definitions: description: "Entity type" type: string enum: - - "object" - - "string" - - "number" - - "array" + - "object" + - "string" + - "number" + - "array" SchemaDef: $ref: "#/definitions/JsonObjectDef" @@ -2617,7 +2617,7 @@ definitions: description: "is updated value an array" values: type: "array" - items: + items: type: "object" description: "Values to update field with" @@ -2654,7 +2654,7 @@ definitions: filters: type: array description: "Filter for results documents" - items: + items: $ref: "#/definitions/FilterDef" sort: description: "Specifies results sorting order" @@ -2735,7 +2735,7 @@ definitions: filters: type: array description: "Filter for results documents" - items: + items: $ref: "#/definitions/FilterDef" sort: description: "Specifies results sorting order" @@ -2774,31 +2774,31 @@ definitions: type: string description: "Condition operator" enum: - - "EQ" # Equality (between field and value or between 2 fields) - - "GT" # Grower than (>) - - "GE" # Grower or equal (>=) - - "LE" # Less or equal (<=) - - "LT" # Less than (>) - - "SET" # field in (value1, value2, ...). Checks if value belongs to the set (or if 2 arrays has common values) - - "ALLSET" # Checks all the values from the first array (field) belong to the second array (value) - - "EMPTY" # Check if field is null - - "RANGE" # Check if field is >= value1 and <= value2 - - "LIKE" # Filter strings by give patter - - "DWITHIN" # Checks if point (from field) lies within given circle. Value must look like this: [[1, -3.5],5.0], where [1, -3.5] is the central point of the circle and 5.0 is the radius + - "EQ" # Equality (between field and value or between 2 fields) + - "GT" # Grower than (>) + - "GE" # Grower or equal (>=) + - "LE" # Less or equal (<=) + - "LT" # Less than (>) + - "SET" # field in (value1, value2, ...). Checks if value belongs to the set (or if 2 arrays has common values) + - "ALLSET" # Checks all the values from the first array (field) belong to the second array (value) + - "EMPTY" # Check if field is null + - "RANGE" # Check if field is >= value1 and <= value2 + - "LIKE" # Filter strings by give patter + - "DWITHIN" # Checks if point (from field) lies within given circle. Value must look like this: [[1, -3.5],5.0], where [1, -3.5] is the central point of the circle and 5.0 is the radius op: type: string description: "Logic operator" enum: - - "AND" - - "OR" - - "NOT" + - "AND" + - "OR" + - "NOT" value: description: "Value of filter. Single integer or string for EQ, GT, GE, LE, LT condition, array of 2 elements for RANGE condition, variable len array for SET and ALLSET conditions, or something like that: '[[1, -3.5],5.0]' for DWITHIN" type: object filters: type: array description: "Filter for results documents" - items: + items: $ref: "#/definitions/FilterDef" join_query: $ref: "#/definitions/JoinedDef" @@ -2842,7 +2842,7 @@ definitions: type: object required: - namespace - - on + - "on" - type properties: namespace: @@ -2852,13 +2852,13 @@ definitions: type: string description: "Join type" enum: - - "LEFT" - - "INNER" - - "ORINNER" + - "LEFT" + - "INNER" + - "ORINNER" filters: type: array description: "Filter for results documents" - items: + items: $ref: "#/definitions/FilterDef" sort: type: "array" @@ -2873,7 +2873,7 @@ definitions: on: type: array description: "Join ON statement" - items: + items: $ref: "#/definitions/OnDef" select_filter: description: "Filter fields of returned document. Can be dot separated, e.g 'subobject.field'" @@ -2898,19 +2898,19 @@ definitions: type: string description: "Condition operator" enum: - - "EQ" # Equality - - "GT" # Grower than (>) - - "GE" # Grower or equal (>=) - - "LE" # Less or equal (<=) - - "LT" # Less than (>) - - "SET" # X in (v1, v2, ...) + - "EQ" # Equality + - "GT" # Grower than (>) + - "GE" # Grower or equal (>=) + - "LE" # Less or equal (<=) + - "LT" # Less than (>) + - "SET" # X in (v1, v2, ...) op: type: string description: "Logic operator" enum: - - "AND" - - "OR" - - "NOT" + - "AND" + - "OR" + - "NOT" AggregationsDef: type: object @@ -2927,12 +2927,12 @@ definitions: type: string description: "Aggregation function" enum: - - "SUM" - - "AVG" - - "MIN" - - "MAX" - - "FACET" - - "DISTINCT" + - "SUM" + - "AVG" + - "MIN" + - "MAX" + - "FACET" + - "DISTINCT" sort: description: "Specifies results sorting order. Allowed only for FACET" type: array @@ -2940,7 +2940,7 @@ definitions: $ref: "#/definitions/AggregationsSortDef" limit: type: integer - default: UINT_MAX + default: 4294967295 minimum: 0 description: "Number of rows to get from result set. Allowed only for FACET" offset: @@ -2964,10 +2964,10 @@ definitions: type: string description: "Aggregation function" enum: - - "SUM" - - "AVG" - - "MIN" - - "MAX" + - "SUM" + - "AVG" + - "MIN" + - "MAX" AggregationsSortDef: description: "Specifies facet aggregations results sorting order" @@ -3032,7 +3032,7 @@ definitions: $ref: "#/definitions/FtStopWordObject" stemmers: type: array - default: ["en","ru"] + default: [ "en","ru" ] description: "List of stemmers to use" items: type: string @@ -3180,8 +3180,8 @@ definitions: type: string description: "Optimize the index by memory or by cpu" enum: - - "Memory" - - "CPU" + - "Memory" + - "CPU" default: "Memory" enable_preselect_before_ft: type: boolean @@ -3289,8 +3289,8 @@ definitions: type: string description: "Text tokenization algorithm. 'fast' - splits text by spaces, special characters and unsupported UTF-8 symbols. Each token is a combination of letters from supported UTF-8 subset, numbers and extra word symbols. 'mmseg_cn' - algorithm based on friso implementation of mmseg for Chinese and English" enum: - - "fast" - - "mmseg_cn" + - "fast" + - "mmseg_cn" default: "fast" FulltextFieldConfig: @@ -3497,7 +3497,7 @@ definitions: general_sort_us: type: integer description: "Result sort time" - sort_index: + sort_index: type: string description: "Index, which used for sort results" sort_by_uncommitted_index: @@ -3513,10 +3513,10 @@ definitions: type: string description: "Method, used to process condition" enum: - - "scan" - - "index" - - "inner_join" - - "left_join" + - "scan" + - "index" + - "inner_join" + - "left_join" field: type: string description: "Field or index name" @@ -3548,20 +3548,20 @@ definitions: type: string description: "Select iterator type" enum: - - "Comparator" - - "TwoFieldsComparison" - - "Skipped" - - "Forward" - - "Reverse" - - "SingleRange" - - "SingleIdset" - - "SingleIdSetWithDeferedSort" - - "RevSingleRange" - - "RevSingleIdset" - - "RevSingleIdSetWithDeferedSort" - - "OnlyComparator" - - "Unsorted" - - "UnbuiltSortOrdersIndex" + - "Comparator" + - "TwoFieldsComparison" + - "Skipped" + - "Forward" + - "Reverse" + - "SingleRange" + - "SingleIdset" + - "SingleIdSetWithDeferedSort" + - "RevSingleRange" + - "RevSingleIdset" + - "RevSingleIdSetWithDeferedSort" + - "OnlyComparator" + - "Unsorted" + - "UnbuiltSortOrdersIndex" description: type: string description: "Description of the selector" @@ -3617,9 +3617,9 @@ definitions: type: string description: Optional. Aggregation type used in subquery enum: - - "min" - - "max" - - "distinct" + - "min" + - "max" + - "distinct" success: type: boolean description: result of injection attempt @@ -3667,12 +3667,12 @@ definitions: type: string description: "Aggregation function" enum: - - "SUM" - - "AVG" - - "MIN" - - "MAX" - - "FACET" - - "DISTINCT" + - "SUM" + - "AVG" + - "MIN" + - "MAX" + - "FACET" + - "DISTINCT" value: type: number description: "Value, calculated by aggregator" @@ -3690,7 +3690,7 @@ definitions: description: "Facet fields values" items: type: string - count: + count: type: integer description: "Count of elements these fields values" distincts: @@ -3764,7 +3764,7 @@ definitions: properties: name: type: string - description: "Name of namespace" + description: "Name of namespace" items_count: type: integer description: "Total count of documents in namespace" @@ -3858,17 +3858,17 @@ definitions: JoinCacheMemStats: description: "Join cache stats. Stores results of selects to right table by ON condition" - allOf: + allOf: - $ref: "#/definitions/CacheMemStats" QueryCacheMemStats: description: "Query cache stats. Stores results of SELECT COUNT(*) by Where conditions" - allOf: + allOf: - $ref: "#/definitions/CacheMemStats" IndexCacheMemStats: description: "Idset cache stats. Stores merged reverse index results of SELECT field IN(...) by IN(...) keys" - allOf: + allOf: - $ref: "#/definitions/CacheMemStats" CacheMemStats: @@ -3919,18 +3919,18 @@ definitions: type: object description: "State of current master namespace" properties: - data_hash: - type: integer - description: "Hashsum of all records in namespace" - last_lsn: - type: integer - description: "Last Log Sequence Number (LSN) of applied namespace modification" - updated_unix_nano: - type: integer - description: "Last update time" - data_count: - type: integer - description: "Items count in master namespace" + data_hash: + type: integer + description: "Hashsum of all records in namespace" + last_lsn: + type: integer + description: "Last Log Sequence Number (LSN) of applied namespace modification" + updated_unix_nano: + type: integer + description: "Last update time" + data_count: + type: integer + description: "Items count in master namespace" incarnation_counter: type: integer description: "Number of storage's master <-> slave switches" @@ -3967,16 +3967,22 @@ definitions: properties: name: type: string - description: "Name of namespace" + description: "Name of namespace" updates: $ref: "#/definitions/UpdatePerfStats" selects: $ref: "#/definitions/SelectPerfStats" transactions: $ref: "#/definitions/TransactionsPerfStats" + join_cache: + description: "Joins cache statistics" + $ref: "#/definitions/LRUCachePerfStats" + query_count_cache: + description: "Queries cache statistics (for the queries with COUNT_CACHED() aggregation)" + $ref: "#/definitions/LRUCachePerfStats" indexes: type: array - description: "Memory consumption of each namespace index" + description: "Performance statistics for each namespace index" items: type: object properties: @@ -3987,7 +3993,10 @@ definitions: $ref: "#/definitions/UpdatePerfStats" selects: $ref: "#/definitions/SelectPerfStats" - + cache: + description: "If index does not use IDs cache at all, this struct won't be present in response" + $ref: "#/definitions/LRUCachePerfStats" + CommonPerfStats: type: object properties: @@ -4021,12 +4030,12 @@ definitions: UpdatePerfStats: description: "Performance statistics for update operations" - allOf: + allOf: - $ref: "#/definitions/CommonPerfStats" SelectPerfStats: description: "Performance statistics for select operations" - allOf: + allOf: - $ref: "#/definitions/CommonPerfStats" TransactionsPerfStats: @@ -4090,7 +4099,7 @@ definitions: QueryPerfStats: description: "Performance statistics per each query" - allOf: + allOf: - $ref: "#/definitions/CommonPerfStats" - type: object properties: @@ -4101,18 +4110,34 @@ definitions: type: string description: "not normalized SQL representation of longest query" + LRUCachePerfStats: + description: "Performance statistics for specific LRU-cache instance" + properties: + total_queries: + description: "Queries total count" + type: integer + minimum: 0 + cache_hit_rate: + description: "Cache hit rate (hits / total_queries)" + type: number + minimum: 0.0 + maximum: 1.0 + is_active: + description: "Determines if cache is currently in use. Usually it has 'false' value for uncommited indexes" + type: boolean + SystemConfigItem: type: object required: - type properties: - type: + type: type: string enum: - - profiling - - namespaces - - replication - - action + - profiling + - namespaces + - replication + - action default: "profiling" profiling: $ref: "#/definitions/ProfilingConfig" @@ -4155,15 +4180,15 @@ definitions: $ref: "#/definitions/LongQueriesLogging" LongQueriesLogging: - type: object - description: "Parameters for logging long queries and transactions" - properties: - select: - $ref: "#/definitions/SelectLogging" - update_delete: - $ref: "#/definitions/UpdateDeleteLogging" - transaction: - $ref: "#/definitions/TransactionLogging" + type: object + description: "Parameters for logging long queries and transactions" + properties: + select: + $ref: "#/definitions/SelectLogging" + update_delete: + $ref: "#/definitions/UpdateDeleteLogging" + transaction: + $ref: "#/definitions/TransactionLogging" SelectLogging: type: object @@ -4178,16 +4203,16 @@ definitions: default: false UpdateDeleteLogging: - type: object - properties: - threshold_us: - type: integer - description: "Threshold value for logging UPDATE and DELETE queries, if -1 logging is disabled" - default: -1 - normalized: - type: boolean - description: "Output the query in a normalized form" - default: false + type: object + properties: + threshold_us: + type: integer + description: "Threshold value for logging UPDATE and DELETE queries, if -1 logging is disabled" + default: -1 + normalized: + type: boolean + description: "Output the query in a normalized form" + default: false TransactionLogging: type: object @@ -4203,7 +4228,7 @@ definitions: NamespacesConfig: type: object - properties: + properties: namespace: type: string description: "Name of namespace, or `*` for setting to all namespaces" @@ -4278,6 +4303,13 @@ definitions: default: 20000 minimum: 0 description: "Enables synchronous storage flush inside write-calls, if async updates count is more than sync_storage_flush_limit. 0 - disables synchronous storage flush, in this case storage will be flushed in background thread only" + strict_mode: + description: "Strict mode for queries. Adds additional check for fields('names')/indexes('indexes') existence in sorting and filtering conditions" + type: string + enum: + - "none" + - "names" + - "indexes" cache: type: object properties: @@ -4329,7 +4361,7 @@ definitions: server_id: type: integer default: 0 - minimun: 0 + minimum: 0 maximum: 999 description: "Node identifier. Should be unique for each node in the replicated cluster (non-unique IDs are also allowed, but may lead to the inconsistency in some cases" cluster_id: @@ -4342,7 +4374,7 @@ definitions: - role - nodes - namespaces - properties: + properties: role: type: string description: "Replication role" @@ -4427,7 +4459,7 @@ definitions: type: object required: - command - properties: + properties: command: type: string description: "Command to execute" @@ -4452,9 +4484,9 @@ definitions: type: "string" description: "User role" enum: - - "owner" - - "db_admin" - - "data_write" - - "data_read" - - "none" - - "unauthoried" \ No newline at end of file + - "owner" + - "db_admin" + - "data_write" + - "data_read" + - "none" + - "unauthoried" diff --git a/cpp_src/server/grpc/CMakeLists.txt b/cpp_src/server/grpc/CMakeLists.txt index 1b88fd1ce..1714ee918 100644 --- a/cpp_src/server/grpc/CMakeLists.txt +++ b/cpp_src/server/grpc/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.0) +cmake_minimum_required(VERSION 3.10) project(reindexer_grpc_library) set (TARGET reindexer_grpc_library) diff --git a/cpp_src/server/grpc/reindexerservice.cc b/cpp_src/server/grpc/reindexerservice.cc index 1f4738e55..7a163b86a 100644 --- a/cpp_src/server/grpc/reindexerservice.cc +++ b/cpp_src/server/grpc/reindexerservice.cc @@ -624,7 +624,11 @@ Error ReindexerService::executeQuery(const std::string& dbName, const Query& que reindexer::Query q; switch (query.encdoingtype()) { case EncodingType::JSON: - status = q.FromJSON(query.data()); + try { + q = reindexer::Query::FromJSON(query.data()); + } catch (reindexer::Error& err) { + status = std::move(err); + } break; case EncodingType::MSGPACK: // TODO: merge from appropriate MR diff --git a/cpp_src/server/httpserver.cc b/cpp_src/server/httpserver.cc index 2917ede2d..fb01ea0e1 100644 --- a/cpp_src/server/httpserver.cc +++ b/cpp_src/server/httpserver.cc @@ -169,13 +169,14 @@ int HTTPServer::PostQuery(http::Context& ctx) { std::string dsl = ctx.body->Read(); reindexer::Query q; - auto err = q.FromJSON(dsl); - if (!err.ok()) { + try { + q = Query::FromJSON(dsl); + } catch (Error& err) { return jsonStatus(ctx, http::HttpStatus(err)); } reindexer::ActiveQueryScope scope(q, QuerySelect); - err = db.Select(q, res); + auto err = db.Select(q, res); if (!err.ok()) { return jsonStatus(ctx, http::HttpStatus(err)); } @@ -187,14 +188,15 @@ int HTTPServer::DeleteQuery(http::Context& ctx) { std::string dsl = ctx.body->Read(); reindexer::Query q; - auto status = q.FromJSON(dsl); - if (!status.ok()) { - return jsonStatus(ctx, http::HttpStatus(status)); + try { + q = Query::FromJSON(dsl); + } catch (Error& err) { + return jsonStatus(ctx, http::HttpStatus(err)); } reindexer::ActiveQueryScope scope(q, QueryDelete); reindexer::QueryResults res; - status = db.Delete(q, res); + auto status = db.Delete(q, res); if (!status.ok()) { return jsonStatus(ctx, http::HttpStatus(status)); } @@ -211,14 +213,15 @@ int HTTPServer::UpdateQuery(http::Context& ctx) { std::string dsl = ctx.body->Read(); reindexer::Query q; - auto status = q.FromJSON(dsl); - if (!status.ok()) { - return jsonStatus(ctx, http::HttpStatus(status)); + try { + q = Query::FromJSON(dsl); + } catch (Error& err) { + return jsonStatus(ctx, http::HttpStatus(err)); } reindexer::ActiveQueryScope scope(q, QueryUpdate); reindexer::QueryResults res; - status = db.Update(q, res); + auto status = db.Update(q, res); if (!status.ok()) { return jsonStatus(ctx, http::HttpStatus(status)); } @@ -2084,9 +2087,10 @@ int HTTPServer::DeleteQueryTx(http::Context& ctx) { std::string dsl = ctx.body->Read(); reindexer::Query q; - auto ret = q.FromJSON(dsl); - if (!ret.ok()) { - return jsonStatus(ctx, http::HttpStatus(ret)); + try { + q = Query::FromJSON(dsl); + } catch (Error& err) { + return jsonStatus(ctx, http::HttpStatus(err)); } reindexer::QueryResults res; std::string txId = urldecode2(ctx.request->urlParams[1]); diff --git a/cpp_src/server/rpcserver.cc b/cpp_src/server/rpcserver.cc index e7c024eef..0c9da667b 100644 --- a/cpp_src/server/rpcserver.cc +++ b/cpp_src/server/rpcserver.cc @@ -845,7 +845,7 @@ RPCQrWatcher::Ref RPCServer::createQueryResults(cproto::Context& ctx, RPCQrId& i } } - if rx_unlikely (data->results.size() >= cproto::kMaxConcurentQueries) { + if rx_unlikely (data->results.size() >= cproto::kMaxConcurrentQueries) { for (unsigned idx = 0; idx < data->results.size(); ++idx) { RPCQrId tmpQrId{data->results[idx].main, data->results[idx].uid}; assertrx(tmpQrId.main >= 0); diff --git a/cpp_src/server/vendor/prometheus/impl/check_names.cc b/cpp_src/server/vendor/prometheus/impl/check_names.cc index 9ba1e9e0b..0cac3a0a1 100644 --- a/cpp_src/server/vendor/prometheus/impl/check_names.cc +++ b/cpp_src/server/vendor/prometheus/impl/check_names.cc @@ -4,8 +4,8 @@ #if defined(__GLIBCXX__) && __GLIBCXX__ <= 20150623 #define STD_REGEX_IS_BROKEN #endif -#if defined(__GNUC__) && (__GNUC__ == 12) && (__GNUC_MINOR__ < 4) && defined(REINDEX_WITH_ASAN) -// regex header is broken in GCC 12.0-12.3 with ASAN +#if defined(__GNUC__) && ((__GNUC__ == 12) || (__GNUC__ == 13)) && defined(REINDEX_WITH_ASAN) +// regex header is broken in GCC 12.0-13.3 with ASAN #define STD_REGEX_IS_BROKEN #endif #if defined(_MSC_VER) && _MSC_VER < 1900 @@ -20,7 +20,9 @@ namespace prometheus { bool CheckMetricName(const std::string& name) { // see https://prometheus.io/docs/concepts/data_model/ auto reserved_for_internal_purposes = name.compare(0, 2, "__") == 0; - if (reserved_for_internal_purposes) return false; + if (reserved_for_internal_purposes) { + return false; + } #ifdef STD_REGEX_IS_BROKEN return !name.empty(); #else @@ -32,7 +34,9 @@ bool CheckMetricName(const std::string& name) { bool CheckLabelName(const std::string& name) { // see https://prometheus.io/docs/concepts/data_model/ auto reserved_for_internal_purposes = name.compare(0, 2, "__") == 0; - if (reserved_for_internal_purposes) return false; + if (reserved_for_internal_purposes) { + return false; + } #ifdef STD_REGEX_IS_BROKEN return !name.empty(); #else diff --git a/cpp_src/tools/errors.cc b/cpp_src/tools/errors.cc index 07d095df8..a21767c4a 100644 --- a/cpp_src/tools/errors.cc +++ b/cpp_src/tools/errors.cc @@ -1,7 +1,16 @@ #include "errors.h" +#include "debug/backtrace.h" namespace reindexer { +void print_backtrace_and_abort(std::string&& msg) { + std::cerr << msg; + reindexer::debug::print_backtrace(std::cerr, nullptr, -1); + reindexer::debug::print_crash_query(std::cerr); + reindexer::debug::backtrace_set_assertion_message(std::move(msg)); + abort(); +} + const Error::WhatPtr Error::defaultErrorText_{make_intrusive("Error text generation failed")}; std::ostream& operator<<(std::ostream& os, const Error& error) { diff --git a/cpp_src/tools/errors.h b/cpp_src/tools/errors.h index 6b8f50a3a..4d712b6bb 100644 --- a/cpp_src/tools/errors.h +++ b/cpp_src/tools/errors.h @@ -7,39 +7,35 @@ #include "estl/intrusive_ptr.h" #ifdef REINDEX_CORE_BUILD -#include "debug/backtrace.h" #include "fmt/printf.h" #endif // REINDEX_CORE_BUILD namespace reindexer { +[[noreturn]] void print_backtrace_and_abort(std::string&& msg); + #if defined(REINDEX_CORE_BUILD) template -void assertf_fmt(const char* fmt, const Args&... args) { - fmt::fprintf(stderr, fmt, args...); +std::string assertf_fmt(const char* fmt, const Args&... args) { + return fmt::sprintf(fmt, args...); } #if defined(NDEBUG) #define assertf(...) ((void)0) #else // Using (void)fmt here to force ';' usage after the macro -#define assertf(e, fmt, ...) \ - if rx_unlikely (!(e)) { \ - reindexer::assertf_fmt("%s:%d: failed assertion '%s':\n" fmt, __FILE__, __LINE__, #e, __VA_ARGS__); \ - reindexer::debug::print_backtrace(std::cerr, nullptr, -1); \ - reindexer::debug::print_crash_query(std::cerr); \ - abort(); \ - } \ +#define assertf(e, fmt, ...) \ + if rx_unlikely (!(e)) { \ + reindexer::print_backtrace_and_abort( \ + reindexer::assertf_fmt("%s:%d: failed assertion '%s':\n" fmt, __FILE__, __LINE__, #e, __VA_ARGS__)); \ + } \ (void)fmt #endif // NDEBUG #ifdef RX_WITH_STDLIB_DEBUG -#define assertf_dbg(e, fmt, ...) \ - if rx_unlikely (!(e)) { \ - reindexer::assertf_fmt("%s:%d: failed assertion '%s':\n" fmt, __FILE__, __LINE__, #e, __VA_ARGS__); \ - reindexer::debug::print_backtrace(std::cerr, nullptr, -1); \ - reindexer::debug::print_crash_query(std::cerr); \ - abort(); \ - } \ +#define assertf_dbg(e, fmt, ...) \ + if rx_unlikely (!(e)) { \ + print_backtrace_and_abort(reindexer::assertf_fmt("%s:%d: failed assertion '%s':\n" fmt, __FILE__, __LINE__, #e, __VA_ARGS__)); \ + } \ (void)fmt #else // RX_WITH_STDLIB_DEBUG #define assertf_dbg(...) ((void)0) diff --git a/cpp_src/tools/fsops.cc b/cpp_src/tools/fsops.cc index d76ba7f5d..f14401fe4 100644 --- a/cpp_src/tools/fsops.cc +++ b/cpp_src/tools/fsops.cc @@ -178,7 +178,8 @@ int RmDirAll(const std::string& path) noexcept { std::string GetCwd() { char buff[FILENAME_MAX]; - return std::string(getcwd(buff, FILENAME_MAX)); + auto cwd = getcwd(buff, FILENAME_MAX); + return cwd ? std::string(cwd) : std::string(); } static std::string tmpDir; diff --git a/cpp_src/tools/lsn.h b/cpp_src/tools/lsn.h index 8aa340e36..1d402825d 100644 --- a/cpp_src/tools/lsn.h +++ b/cpp_src/tools/lsn.h @@ -17,14 +17,13 @@ struct LSNUnpacked { int64_t counter; }; -struct lsn_t { +class lsn_t { private: - static constexpr int16_t kMinServerIDValue = 0; - static constexpr int16_t kMaxServerIDValue = 999; - static constexpr int64_t kMaxCounter = 1000000000000000ll; public: + static constexpr int16_t kMinServerIDValue = 0; + static constexpr int16_t kMaxServerIDValue = 999; static constexpr int64_t kDefaultCounter = kMaxCounter - 1; void GetJSON(JsonBuilder& builder) const; @@ -75,10 +74,10 @@ struct lsn_t { } int64_t SetCounter(int64_t counter) { validateCounter(counter); - payload_ = Server() * kMaxCounter + counter; if (counter < 0) { counter = kDefaultCounter; } + payload_ = Server() * kMaxCounter + counter; return payload_; } int64_t Counter() const noexcept { return payload_ % kMaxCounter; } diff --git a/cpp_src/tools/stringstools.cc b/cpp_src/tools/stringstools.cc index 17077f159..1d83deef4 100644 --- a/cpp_src/tools/stringstools.cc +++ b/cpp_src/tools/stringstools.cc @@ -1,12 +1,11 @@ #include #include -#include #include "atoi/atoi.h" -#include "fmt/compile.h" #include "core/keyvalue/key_string.h" #include "core/keyvalue/uuid.h" #include "estl/one_of.h" +#include "fmt/compile.h" #include "frozen_str_tools.h" #include "itoa/itoa.h" #include "stringstools.h" @@ -172,8 +171,11 @@ Variant stringToVariant(std::string_view value) { return kvt.EvaluateOneOf([value](KeyValueType::Int64) { return Variant(int64_t(stoll(value))); }, [value](KeyValueType::Int) { return Variant(int(stoi(value))); }, [value](KeyValueType::Double) { - char* p = nullptr; - return Variant(double(strtod(value.data(), &p))); + using double_conversion::StringToDoubleConverter; + static const StringToDoubleConverter converter{StringToDoubleConverter::NO_FLAGS, NAN, NAN, nullptr, + nullptr}; + int countOfCharsParsedAsDouble = 0; + return Variant(converter.StringToDouble(value.data(), value.size(), &countOfCharsParsedAsDouble)); }, [value](KeyValueType::String) { return Variant(make_key_string(value.data(), value.length())); }, [value](KeyValueType::Bool) noexcept { return (value.size() == 4) ? Variant(true) : Variant(false); }, @@ -419,13 +421,38 @@ ComparationResult collateCompare(std::string_view lhs, std::string_ return ComparationResult::Eq; } +static long int strntol(std::string_view str, const char** end, int base) noexcept { + char buf[24]; + long int ret; + const char* beg = str.data(); + auto sz = str.size(); + for (; beg && sz && *beg == ' '; beg++, sz--); + assertrx_dbg(end); + + if (!sz || sz >= sizeof(buf)) { + *end = str.data(); + return 0; + } + + // beg can not be null if sz != 0 + // NOLINTNEXTLINE(clang-analyzer-core.NonNullParamChecker) + std::memcpy(buf, beg, sz); + buf[sz] = '\0'; + ret = std::strtol(buf, const_cast(end), base); + if (ret == LONG_MIN || ret == LONG_MAX) { + return ret; + } + *end = str.data() + (*end - buf); + return ret; +} + template <> ComparationResult collateCompare(std::string_view lhs, std::string_view rhs, const SortingPrioritiesTable&) noexcept { - char* posl = nullptr; - char* posr = nullptr; + const char* posl = nullptr; + const char* posr = nullptr; - int numl = strtol(lhs.data(), &posl, 10); - int numr = strtol(rhs.data(), &posr, 10); + int numl = strntol(lhs, &posl, 10); + int numr = strntol(rhs, &posr, 10); if (numl == numr) { auto minlen = std::min(lhs.size() - (posl - lhs.data()), rhs.size() - (posr - rhs.data())); diff --git a/cpp_src/tools/varint.h b/cpp_src/tools/varint.h index 1fbddd201..65314aaf2 100644 --- a/cpp_src/tools/varint.h +++ b/cpp_src/tools/varint.h @@ -260,5 +260,5 @@ inline unsigned scan_varint(unsigned len, const uint8_t* data) noexcept { #ifndef _MSC_VER #pragma GCC diagnostic pop #else -#pragma warning(push) +#pragma warning(pop) #endif diff --git a/cpp_src/wal/walrecord.h b/cpp_src/wal/walrecord.h index 715287938..d2741cb78 100644 --- a/cpp_src/wal/walrecord.h +++ b/cpp_src/wal/walrecord.h @@ -2,13 +2,14 @@ #include #include -#include #include #include #include "core/keyvalue/p_string.h" #include "estl/chunk.h" #include "estl/h_vector.h" +#include "estl/intrusive_ptr.h" #include "estl/span.h" +#include "tools/lsn.h" namespace reindexer { @@ -105,11 +106,17 @@ struct PackedWALRecord : public h_vector { #pragma pack(push, 1) struct MarkedPackedWALRecord : public PackedWALRecord { - MarkedPackedWALRecord() = default; + MarkedPackedWALRecord(int16_t s) noexcept : server{s} { + assertrx_dbg(server >= lsn_t::kMinServerIDValue); + assertrx_dbg(server <= lsn_t::kMaxServerIDValue); + } template - MarkedPackedWALRecord(int16_t s, RecordT&& rec) : PackedWALRecord(std::forward(rec)), server(s) {} + MarkedPackedWALRecord(int16_t s, RecordT&& rec) : PackedWALRecord(std::forward(rec)), server(s) { + assertrx_dbg(server >= lsn_t::kMinServerIDValue); + assertrx_dbg(server <= lsn_t::kMaxServerIDValue); + } - int16_t server; + int16_t server = -1; void Pack(int16_t _serverId, const WALRecord& rec); }; #pragma pack(pop) diff --git a/cpp_src/wal/walselecter.cc b/cpp_src/wal/walselecter.cc index bfaf08b17..b8b1e819f 100644 --- a/cpp_src/wal/walselecter.cc +++ b/cpp_src/wal/walselecter.cc @@ -3,11 +3,8 @@ #include "core/formatters/lsn_fmt.h" #include "core/namespace/namespaceimpl.h" #include "core/nsselecter/nsselecter.h" -#include "core/rdxcontext.h" #include "tools/semversion.h" -#include "tools/logger.h" - namespace reindexer { const SemVersion kMinUnknownReplSupportRxVersion("2.6.0"); @@ -33,7 +30,7 @@ void WALSelecter::operator()(LocalQueryResults& result, SelectCtx& params, bool for (size_t i = 0; i < q.Entries().Size(); ++i) { q.Entries().Visit( i, - [&lsnIdx, &versionIdx, i] RX_PRE_LMBD_ALWAYS_INLINE(const QueryEntry& qe) RX_POST_LMBD_ALWAYS_INLINE { + [&lsnIdx, &versionIdx, i](const QueryEntry& qe) { if ("#lsn"sv == qe.FieldName()) { lsnIdx = i; } else if ("#slave_version"sv == qe.FieldName()) { @@ -42,8 +39,7 @@ void WALSelecter::operator()(LocalQueryResults& result, SelectCtx& params, bool throw Error(errLogic, "Unexpected index in WAL select query: %s", qe.FieldName()); } }, - [&q] RX_PRE_LMBD_ALWAYS_INLINE(const auto&) - RX_POST_LMBD_ALWAYS_INLINE { throw Error(errLogic, "Unexpected WAL select query: %s", q.GetSQL()); }); + [&q](const auto&) { throw Error(errLogic, "Unexpected WAL select query: %s", q.GetSQL()); }); } auto slaveVersion = versionIdx < 0 ? SemVersion() : SemVersion(q.Entries().Get(versionIdx).Values()[0].As()); auto& lsnEntry = q.Entries().Get(lsnIdx); @@ -68,7 +64,7 @@ void WALSelecter::operator()(LocalQueryResults& result, SelectCtx& params, bool // Put as ItemRef with raw container PayloadValue pv(data.size(), data.data()); pv.SetLSN(it.GetLSN()); - result.Add(ItemRef(rec.id, pv, 0, 0, true)); + result.AddItemRef(rec.id, std::move(pv), 0, 0, true); }; const auto firstIt = lsnEntry.Condition() == CondGt ? ns_->wal_.upper_bound(fromLSN) : ns_->wal_.inclusive_upper_bound(fromLSN); if (firstIt != walEnd) { @@ -96,7 +92,7 @@ void WALSelecter::operator()(LocalQueryResults& result, SelectCtx& params, bool // Put as usual ItemRef [[maybe_unused]] const auto iLSN = lsn_t(ns_->items_[rec.id].GetLSN()); assertf(iLSN.Counter() == (lsn_t(it.GetLSN()).Counter()), "lsn %s != %s, ns=%s", iLSN, it.GetLSN(), ns_->name_); - result.Add(ItemRef(rec.id, ns_->items_[rec.id])); + result.AddItemRef(rec.id, ns_->items_[rec.id]); count--; } result.totalCount++; @@ -134,9 +130,10 @@ void WALSelecter::operator()(LocalQueryResults& result, SelectCtx& params, bool break; case WalEmpty: if (snapshot) { + // We have to store empty records in snapshot to preserve original server IDs assertrx(!start); assertrx(count < 0); - putWalRecord(it, rec); // TODO: Check if it's possible to remove empty records from, snapshot + putWalRecord(it, rec); } break; case WalReplState: @@ -160,7 +157,7 @@ void WALSelecter::operator()(LocalQueryResults& result, SelectCtx& params, bool wr.Pack(wrec); PayloadValue val(wr.size(), wr.data()); val.SetLSN(lsn_t()); - result.Add(ItemRef(-1, val, 0, 0, true)); + result.AddItemRef(-1, std::move(val), 0, 0, true); }; for (unsigned int i = 1; i < ns_->indexes_.size(); i++) { auto indexDef = ns_->getIndexDefinition(i); @@ -189,7 +186,7 @@ void WALSelecter::operator()(LocalQueryResults& result, SelectCtx& params, bool if (start) { start--; } else if (count) { - result.Add(ItemRef(id, ns_->items_[id])); + result.AddItemRef(id, ns_->items_[id]); count--; } result.totalCount++; @@ -214,6 +211,6 @@ void WALSelecter::putReplState(LocalQueryResults& result) { // Put as ItemRef with raw container PayloadValue pv(wr.size(), wr.data()); pv.SetLSN(lsn_t()); - result.Add(ItemRef(-1, pv, 0, 0, true)); + result.AddItemRef(-1, std::move(pv), 0, 0, true); } } // namespace reindexer diff --git a/cpp_src/wal/waltracker.cc b/cpp_src/wal/waltracker.cc index da64b927f..0357a5fab 100644 --- a/cpp_src/wal/waltracker.cc +++ b/cpp_src/wal/waltracker.cc @@ -123,7 +123,7 @@ void WALTracker::Init(int64_t sz, int64_t minLSN, int64_t maxLSN, AsyncStorage& void WALTracker::put(lsn_t lsn, const WALRecord& rec) { int64_t pos = lsn.Counter() % walSize_; if (pos >= int64_t(records_.size())) { - records_.resize(uint64_t(pos + 1)); + records_.resize(uint64_t(pos + 1), GetServer()); } heapSize_ -= records_[pos].heap_size(); @@ -201,10 +201,10 @@ std::vector> WALTracker::readFromStorage(int64_t& void WALTracker::initPositions(int64_t sz, int64_t minLSN, int64_t maxLSN) { int64_t counter = maxLSN + 1; lsnCounter_.SetCounter(counter); - lastLsn_ = lsn_t(maxLSN, lsnCounter_.Server()); + lastLsn_ = lsn_t(maxLSN, GetServer()); walSize_ = sz; records_.clear(); - records_.resize(std::min(counter, walSize_)); + records_.resize(std::min(counter, walSize_), GetServer()); heapSize_ = 0; if (minLSN == std::numeric_limits::max() || !walSize_) { walOffset_ = 0; @@ -219,6 +219,7 @@ void WALTracker::initPositions(int64_t sz, int64_t minLSN, int64_t maxLSN) { template lsn_t WALTracker::add(RecordT&& rec, lsn_t originLsn, bool toStorage, lsn_t oldLsn) { + const auto localServerID = GetServer(); lsn_t lsn = originLsn; if (lsn.isEmpty()) { lsn = lsnCounter_++; @@ -229,7 +230,7 @@ lsn_t WALTracker::add(RecordT&& rec, lsn_t originLsn, bool toStorage, lsn_t oldL auto newCounter = lsn.Counter(); if (lsnCounter_.Counter() == 0) { // If there are no WAL records and we've got record with some large LSN assertrx(records_.empty()); - records_.resize(newCounter % walSize_); + records_.resize(newCounter % walSize_, localServerID); } lsnCounter_.SetCounter(newCounter + 1); } @@ -242,7 +243,7 @@ lsn_t WALTracker::add(RecordT&& rec, lsn_t originLsn, bool toStorage, lsn_t oldL put(lastLsn_, std::forward(rec)); if (!oldLsn.isEmpty() && available(oldLsn)) { put(oldLsn, WALRecord()); - if (oldLsn.Server()) { + if (oldLsn.Server() != localServerID) { writeToStorage(oldLsn); // Write empty record to the storage to preserve it's server ID } } diff --git a/describer.go b/describer.go index 3b25ccad5..4f9d3179d 100644 --- a/describer.go +++ b/describer.go @@ -117,8 +117,6 @@ func CreateInt64FromLSN(v LsnT) int64 { type NamespaceMemStat struct { // Name of namespace Name string `json:"name"` - // [[deperecated]]. do not use - StorageError string `json:"storage_error"` // Filesystem path to namespace storage StoragePath string `json:"storage_path"` // Status of disk storage (true, if storage is enabled and writable) @@ -262,6 +260,29 @@ type TxPerfStat struct { MaxCopyTimeUs int64 `json:"max_copy_time_us"` } +// LRUCachePerfStat is information about LRU cache efficiency +type LRUCachePerfStat struct { + // Total queries to cache + TotalQueries uint64 `json:"total_queries"` + // Cache hit rate (CacheHits / TotalQueries) + CacheHitRate float64 `json:"cache_hit_rate"` + // Determines if cache is currently in use. Usually it has 'false' value for uncommited indexes + IsActive bool `json:"is_active"` +} + +// IndexPerfStat is information about specific index performance statistics +type IndexPerfStat struct { + // Name of index + Name string `json:"name"` + // Performance statistics for index commit operations + Commits PerfStat `json:"commits"` + // Performance statistics for index select operations + Selects PerfStat `json:"selects"` + // Performance statistics for LRU IdSets index cache (or fulltext cache for text indexes). + // Nil-value means, that index does not use cache at all + Cache *LRUCachePerfStat `json:"cache,omitempty"` +} + // NamespacePerfStat is information about namespace's performance statistics // and located in '#perfstats' system namespace type NamespacePerfStat struct { @@ -273,6 +294,12 @@ type NamespacePerfStat struct { Selects PerfStat `json:"selects"` // Performance statistics for transactions Transactions TxPerfStat `json:"transactions"` + // Performance statistics for JOINs cache + JoinCache LRUCachePerfStat `json:"join_cache"` + // Performance statistics for CountCached aggregation cache + QueryCountCache LRUCachePerfStat `json:"query_count_cache"` + // Performance statistics for each namespace index + Indexes IndexPerfStat `json:"indexes"` } // ClientConnectionStat is information about client connection @@ -505,6 +532,10 @@ type DBNamespacesConfig struct { // 0 - disables synchronous storage flush. In this case storage will be flushed in background thread only // Default value is 20000 SyncStorageFlushLimit int `json:"sync_storage_flush_limit"` + // Strict mode for queries. Adds additional check for fields('names')/indexes('indexes') existence in sorting and filtering conditions" + // Default value - 'names' + // Possible values: 'indexes','names','none' + StrictMode string `json:"strict_mode,omitempty"` // Namespaces' cache configs CacheConfig *NamespaceCacheConfig `json:"cache,omitempty"` } diff --git a/fulltext.md b/fulltext.md index d8065fd97..274ece7a7 100644 --- a/fulltext.md +++ b/fulltext.md @@ -420,7 +420,7 @@ If the list of stopwords looks like this: ``` and there are pair of documents containing this word: `{"...under the roof ..."}, {"... to understand and forgive..."}`. Then for the query 'under*' we will get as a result only document `{"... to understand and forgive..."}` and for the query 'under' we will get nothing as a result. -If the "StopWords" section is not specified in the config, then the [default](./cpp_src/core/ft/stopwords/stop_en.cc) stopwords list will be used, and if it is explicitly specified empty, it means that there are no stopwords. +If the "StopWords" section is not specified in the config, then the [default_en](./cpp_src/core/ft/stopwords/stop_en.cc) and [default_ru](./cpp_src/core/ft/stopwords/stop_ru.cc) stopwords list will be used, and if it is explicitly specified empty, it means that there are no stopwords. ### Detailed typos config diff --git a/reflect.go b/reflect.go index 10471c80d..fbafa2d52 100644 --- a/reflect.go +++ b/reflect.go @@ -40,9 +40,13 @@ type indexOptions struct { } func parseRxTags(field reflect.StructField) (idxName string, idxType string, expireAfter string, idxSettings []string) { - tagsSlice := strings.SplitN(field.Tag.Get("reindex"), ",", 3) + tag, isSet := field.Tag.Lookup("reindex") + tagsSlice := strings.SplitN(tag, ",", 3) var idxOpts string idxName, idxType, expireAfter, idxOpts = tagsSlice[0], "", "", "" + if isSet && len(idxName) == 0 && !field.Anonymous && field.Name != "_" { + idxName = field.Name + } if len(tagsSlice) > 1 { idxType = tagsSlice[1] @@ -140,14 +144,14 @@ func parseIndexesImpl(indexDefs *[]bindings.IndexDef, st reflect.Type, subArray } if jsonTag == "-" && !opts.isComposite && !opts.isJoined { - if reindexTag := field.Tag.Get("reindex"); reindexTag != "" { - return fmt.Errorf("non-composite/non-joined field ('%s'), marked with `json:-` can not have explicit reindex tags, but it does ('%s')", field.Name, reindexTag) + if reindexTag, isSet := field.Tag.Lookup("reindex"); isSet { + return fmt.Errorf("non-composite/non-joined field ('%s'), marked with `json:-` can not have explicit reindex tags, but it does (reindex:\"%s\")", field.Name, reindexTag) } continue } if !opts.isComposite && !field.IsExported() { - if reindexTag := field.Tag.Get("reindex"); reindexTag != "" { - return fmt.Errorf("unexported non-composite field ('%s') can not have reindex tags, but it does ('%s')", field.Name, reindexTag) + if reindexTag, isSet := field.Tag.Lookup("reindex"); isSet { + return fmt.Errorf("unexported non-composite field ('%s') can not have reindex tags, but it does (reindex:\"%s\")", field.Name, reindexTag) } continue } diff --git a/reindexer.go b/reindexer.go index 90488fcb4..60b982132 100644 --- a/reindexer.go +++ b/reindexer.go @@ -159,6 +159,7 @@ type AggregationResult struct { // NewReindex Create new instanse of Reindexer DB // Returns pointer to created instance +// The absolute path for Windows builtin should look like 'builtin://C:/my/folder/db' func NewReindex(dsn interface{}, options ...interface{}) *Reindexer { rx := &Reindexer{ impl: newReindexImpl(dsn, options...), diff --git a/test/dsl_test.go b/test/dsl_test.go index b5719ae91..dc39427c5 100644 --- a/test/dsl_test.go +++ b/test/dsl_test.go @@ -624,7 +624,7 @@ func TestDSLQueries(t *testing.T) { execDSLTwice(t, func(t *testing.T, q *reindexer.Query) { _, err := q.Exec().FetchAll() - require.ErrorContains(t, err, fmt.Sprintf("The '%s' condition is suported only by 'sparse' or 'array' indexes", cond[1])) + require.ErrorContains(t, err, fmt.Sprintf("The '%s' condition is supported only by 'sparse' or 'array' indexes", cond[1])) }, fmt.Sprintf(jsonDSL, cond[0])) } }) @@ -1298,7 +1298,7 @@ func TestDSLQueries(t *testing.T) { execDSLTwice(t, func(t *testing.T, q *reindexer.Query) { _, err := q.Exec().FetchAll() - require.ErrorContains(t, err, fmt.Sprintf("The '%s' condition is suported only by 'sparse' or 'array' indexes", cond[1])) + require.ErrorContains(t, err, fmt.Sprintf("The '%s' condition is supported only by 'sparse' or 'array' indexes", cond[1])) }, fmt.Sprintf(jsonDSL, cond[0])) } }) diff --git a/test/encdec_test.go b/test/encdec_test.go index dbcb0bb63..b00f7438c 100644 --- a/test/encdec_test.go +++ b/test/encdec_test.go @@ -9,6 +9,7 @@ import ( "time" "github.com/restream/reindexer/v4" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -46,63 +47,68 @@ type ( type TestItemEncDec struct { ID int `reindex:"id,-"` *TestEmbedItem - Prices []*TestJoinItem `reindex:"prices,,joined"` - Pricesx []*TestJoinItem `reindex:"pricesx,,joined"` - Packages []int `reindex:"packages,hash"` - UPackages []uint `reindex:"upackages,hash"` - UPackages64 []uint64 `reindex:"upackages64,hash"` - FPackages []float32 `reindex:"fpackages,tree"` - FPackages64 []float64 `reindex:"fpackages64,tree"` - Bool bool `reindex:"bool"` - Bools []bool `reindex:"bools"` - Name string `reindex:"name,tree"` - Countries []string `reindex:"countries,tree"` - Description string `reindex:"description,fuzzytext"` - Rate float64 `reindex:"rate,tree"` - CustomStringsPtr TestCustomStringsPtrs `reindex:"custom_strings_ptrs"` - CustomStrings TestCustomStrings `reindex:"custom_strings"` - CustomInts64 TestCustomInts64 `reindex:"custom_ints64"` - CustomInts16 TestCustomInts16 `reindex:"custom_ints16"` - CustomFloats TestCustomFloats `reindex:"custom_floats"` - IsDeleted bool `reindex:"isdeleted,-"` - PNested *TestNest `reindex:"-"` - Nested TestNest - NestedA [1]TestNest `reindex:"-"` - NonIndexArr []int - NonIndexA [20]float64 - NonIndexPA []*int - Actors []Actor `reindex:"-"` - PricesIDs []int `reindex:"price_id"` - LocationID string `reindex:"location"` - EndTime uint32 `reindex:"end_time,-"` - StartTime uint64 `reindex:"start_time,tree"` - PStrNull *string - PStr *string - Tmp string `reindex:"tmp,-"` - Map1 map[string]int - Map2 map[int64]Actor - Map3 map[int]*Actor - Map4 map[int]*int - Map5 map[int][]int - Map6 map[uint][]uint - Interface interface{} - Interface2 interface{} - InterfaceNull interface{} - MapNull map[string]int - SliceStrNull []string - SliceNull []int - SliceStr []string - SliceUInt []uint - SliceUInt64 []uint64 - NegativeSliceInt64 []int64 - SliceF64 []float64 - SliceF32 []float32 - SliceBool []bool - SliceIface []interface{} - SliceIface1 []interface{} - UInt64 uint64 - UInt32 uint32 - UInt uint + Prices []*TestJoinItem `reindex:"prices,,joined"` + Pricesx []*TestJoinItem `reindex:"pricesx,,joined"` + Packages []int `reindex:"packages,hash"` + UPackages []uint `reindex:"upackages,hash"` + UPackages64 []uint64 `reindex:"upackages64,hash"` + FPackages []float32 `reindex:"fpackages,tree"` + FPackages64 []float64 `reindex:"fpackages64,tree"` + Bool bool `reindex:"bool"` + Bools []bool `reindex:"bools"` + Name string `reindex:"name,tree"` + Countries []string `reindex:"countries,tree"` + Description string `reindex:"description,fuzzytext"` + Rate float64 `reindex:"rate,tree"` + CustomStringsPtr TestCustomStringsPtrs `reindex:"custom_strings_ptrs"` + CustomStrings TestCustomStrings `reindex:"custom_strings"` + CustomInts64 TestCustomInts64 `reindex:"custom_ints64"` + CustomInts16 TestCustomInts16 `reindex:"custom_ints16"` + CustomFloats TestCustomFloats `reindex:"custom_floats"` + IsDeleted bool `reindex:"isdeleted,-"` + EmptyReindexTagStr1 string `reindex:",-"` + EmptyReindexTagStr2 string `reindex:""` + EmptyJsonTagStr string `json:""` + TextLabel1 string `reindex:"TextLabel,text" json:",omitempty"` + TextLabel2 string `reindex:",text" json:",omitempty"` + PNested *TestNest `reindex:"-"` + Nested TestNest + NestedA [1]TestNest `reindex:"-"` + NonIndexArr []int + NonIndexA [20]float64 + NonIndexPA []*int + Actors []Actor `reindex:"-"` + PricesIDs []int `reindex:"price_id"` + LocationID string `reindex:"location"` + EndTime uint32 `reindex:"end_time,-"` + StartTime uint64 `reindex:"start_time,tree"` + PStrNull *string + PStr *string + Tmp string `reindex:"tmp,-"` + Map1 map[string]int + Map2 map[int64]Actor + Map3 map[int]*Actor + Map4 map[int]*int + Map5 map[int][]int + Map6 map[uint][]uint + Interface interface{} + Interface2 interface{} + InterfaceNull interface{} + MapNull map[string]int + SliceStrNull []string + SliceNull []int + SliceStr []string + SliceUInt []uint + SliceUInt64 []uint64 + NegativeSliceInt64 []int64 + SliceF64 []float64 + SliceF32 []float32 + SliceBool []bool + SliceIface []interface{} + SliceIface1 []interface{} + UInt64 uint64 + UInt32 uint32 + UInt uint Custom TestCustomBytes Time time.Time @@ -187,6 +193,11 @@ func FillTestItemsEncDec(start int, count int, pkgsCount int, asJson bool) { Age: rand.Int() % 5, Name: randString(), }, + EmptyReindexTagStr1: randString(), + EmptyReindexTagStr2: randString(), + EmptyJsonTagStr: randString(), + TextLabel1: randString(), + TextLabel2: randString(), PNested: &TestNest{ Age: rand.Int() % 5, Name: randString(), @@ -202,24 +213,24 @@ func FillTestItemsEncDec(start int, count int, pkgsCount int, asJson bool) { `xxxx`: int(rand.Int31()), }, Map2: map[int64]Actor{ - 1: Actor{randString()}, - 100: Actor{randString()}, + 1: {randString()}, + 100: {randString()}, }, Map3: map[int]*Actor{ - 4: &Actor{randString()}, - 2: &Actor{randString()}, + 4: {randString()}, + 2: {randString()}, }, Map4: map[int]*int{ 5: vint1, 120: vint2, }, Map5: map[int][]int{ - 0: []int{1, 2, 3}, - -1: []int{9, 8, 7}, + 0: {1, 2, 3}, + -1: {9, 8, 7}, }, Map6: map[uint][]uint{ - 0: []uint{1, 2, 3}, - 4: []uint{9, 8, 7}, + 0: {1, 2, 3}, + 4: {9, 8, 7}, }, NonIndexPA: []*int{ vint1, @@ -293,15 +304,32 @@ func TestHeterogeneusArrayEncDec(t *testing.T) { defer it.Close() require.NoError(t, it.Error()) - items := make([]interface{}, 0, 1) for it.Next() { item := &TestItemEncDec{} err := json.Unmarshal(it.JSON(), &item) require.NoError(t, err, "error json was: %s\n", it.JSON()) - items = append(items, item) } } +func checkIndexesWithEmptyTags(t *testing.T) { + expectedIndexes := map[string]string{ + "EmptyReindexTagStr1": "-", + "EmptyReindexTagStr2": "hash", + "TextLabel": "text", + "TextLabel2": "text", + } + + desc, err := DB.DescribeNamespace("test_items_encdec") + require.NoError(t, err) + for _, index := range desc.Indexes { + if typ, ok := expectedIndexes[index.Name]; ok { + assert.Equal(t, typ, index.IndexType) + delete(expectedIndexes, index.Name) + } + } + assert.Empty(t, expectedIndexes, "Some of the indexes are missing") +} + func TestEncDec(t *testing.T) { t.Parallel() // Fill items by cjson encoder @@ -310,6 +338,8 @@ func TestEncDec(t *testing.T) { // fill items in json format FillTestItemsEncDec(5000, 10000, 20, true) + checkIndexesWithEmptyTags(t) + // get and decode all items by cjson decoder newTestQuery(DB, "test_items_encdec").ExecAndVerify(t) diff --git a/test/index_struct_test.go b/test/index_struct_test.go index 70bd89be8..e7382568e 100644 --- a/test/index_struct_test.go +++ b/test/index_struct_test.go @@ -120,15 +120,15 @@ func TestOpenNs(t *testing.T) { err := DB.OpenNamespace(ns, reindexer.DefaultNamespaceOptions(), FailSimple{}) assert.ErrorContains(t, err, - "non-composite/non-joined field ('Age'), marked with `json:-` can not have explicit reindex tags, but it does ('age,hash')") + "non-composite/non-joined field ('Age'), marked with `json:-` can not have explicit reindex tags, but it does (reindex:\"age,hash\")") err = DB.OpenNamespace(ns, reindexer.DefaultNamespaceOptions(), FailPrivate{}) assert.ErrorContains(t, err, - "unexported non-composite field ('private') can not have reindex tags, but it does ('private,hash')") + "unexported non-composite field ('private') can not have reindex tags, but it does (reindex:\"private,hash\")") err = DB.OpenNamespace(ns, reindexer.DefaultNamespaceOptions(), FailPrivateJoin{}) assert.ErrorContains(t, err, - "unexported non-composite field ('privateAccounts') can not have reindex tags, but it does ('accounts,,joined')") + "unexported non-composite field ('privateAccounts') can not have reindex tags, but it does (reindex:\"accounts,,joined\")") err = DB.OpenNamespace(ns, reindexer.DefaultNamespaceOptions(), FailJoinScalar{}) assert.ErrorContains(t, err, diff --git a/test/storage_test.go b/test/storage_test.go index aa2da8763..4a1bfe3bc 100644 --- a/test/storage_test.go +++ b/test/storage_test.go @@ -1,8 +1,10 @@ package reindexer import ( + "encoding/json" "fmt" "io/ioutil" + "math/rand" "net/url" "os" "testing" @@ -78,9 +80,29 @@ type TestItemV6 struct { F4 int `reindex:"f4"` } +// Item with regular indexes +type TestIndexesCompatibilityRegularItem struct { + ID int `reindex:"id,,pk"` + StrField string `reindex:"str_field"` + IntField int `reindexe:"int_field,tree"` +} + +const testIndexesCompatibilityRegularNs = "indexes_compat_r_d" + +// Item with dense indexes +type TestIndexesCompatibilityDenseItem struct { + ID int `reindex:"id,,pk"` + StrField string `reindex:"str_field,,dense"` + IntField int `reindexe:"int_field,tree,dense"` +} + +const testIndexesCompatibilityDenseNs = "indexes_compat_d_r" + func init() { tnamespaces[TestItemsStorageNs] = TestItemV1{} tnamespaces[TestWalNs] = TestItem{} + tnamespaces[testIndexesCompatibilityRegularNs] = TestIndexesCompatibilityRegularItem{} + tnamespaces[testIndexesCompatibilityDenseNs] = TestIndexesCompatibilityDenseItem{} } func TestStorageChangeFormat(t *testing.T) { @@ -229,3 +251,76 @@ func TestWal(t *testing.T) { require.Equal(t, lastLsn1, lastLsn2) }) } + +func newTestIndexesCompatibilityRegularItem(id int) interface{} { + return &TestIndexesCompatibilityRegularItem{ + ID: 1000000 + id, + StrField: randString(), + IntField: rand.Intn(100000), + } +} + +func newTestIndexesCompatibilityDenseItem(id int) interface{} { + return &TestIndexesCompatibilityDenseItem{ + ID: 1000000 + id, + StrField: randString(), + IntField: rand.Intn(100000), + } +} + +func TestDenseIndexesCompatibility(t *testing.T) { + getJSONContent := func(t *testing.T, ns string) []string { + var ret []string + it := DB.Query(ns).Sort("id", false).MustExec(t) + require.NoError(t, it.Error()) + for it.Next() { + require.NoError(t, it.Error()) + j, err := json.Marshal(it.Object()) + require.NoError(t, err) + ret = append(ret, string(j)) + } + return ret + } + + testImpl := func(t *testing.T, ns string, oldNewItem func(id int) interface{}, newNewItem func(id int) interface{}, newItemType interface{}) { + const inserts = 100 + for i := 0; i < inserts; i++ { + upd, err := DB.Insert(ns, oldNewItem(i)) + require.NoError(t, err) + require.Equal(t, 1, upd) + } + initialJSONs := getJSONContent(t, ns) + + err := DB.CloseNamespace(ns) + require.NoError(t, err) + err = DB.OpenNamespace(ns, reindexer.DefaultNamespaceOptions(), newItemType) + require.NoError(t, err) + reopennedJSONs := getJSONContent(t, ns) + require.Equal(t, initialJSONs, reopennedJSONs) + require.Equal(t, len(reopennedJSONs), inserts) + + const offset = inserts / 2 + var newJSONs []string + for i := offset; i < inserts+offset; i++ { + item := newNewItem(i) + j, err := json.Marshal(item) + require.NoError(t, err) + newJSONs = append(newJSONs, string(j)) + err = DB.Upsert(ns, item) + require.NoError(t, err) + } + finalJSONs := getJSONContent(t, ns) + require.Equal(t, initialJSONs[0:offset], finalJSONs[0:offset]) + require.Equal(t, newJSONs, finalJSONs[offset:]) + } + + t.Run("Binding is able to reopen namespace with dense indexes", func(t *testing.T) { + testImpl(t, testIndexesCompatibilityRegularNs, newTestIndexesCompatibilityRegularItem, + newTestIndexesCompatibilityDenseItem, TestIndexesCompatibilityDenseItem{}) + }) + + t.Run("Binding is able to reopen namespace with regular indexes", func(t *testing.T) { + testImpl(t, testIndexesCompatibilityDenseNs, newTestIndexesCompatibilityDenseItem, + newTestIndexesCompatibilityRegularItem, TestIndexesCompatibilityRegularItem{}) + }) +}