Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

datastore: kvdb::HistoryRangeQuery #2698

Merged
merged 1 commit into from
Feb 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 122 additions & 0 deletions silkworm/db/datastore/common/caching_view.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
/*
Copyright 2024 The Silkworm Authors

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

#pragma once

#include <concepts>
#include <iterator>
#include <optional>
#include <ranges>

namespace silkworm::views {

/**
* Like views::cache1 in Range-v3
* https://ericniebler.github.io/range-v3/structranges_1_1views_1_1cache1__fn.html
* https://stackoverflow.com/questions/67321666/generator-called-twice-in-c20-views-pipeline
*/
template <std::ranges::range TRange>
requires std::movable<TRange>
class CachingView : public std::ranges::view_interface<CachingView<TRange>> {
public:
class Iterator {
public:
using RangeIterator = std::ranges::iterator_t<TRange>;
using RangeSentinel = std::ranges::sentinel_t<TRange>;

using value_type = typename RangeIterator::value_type;
using iterator_category [[maybe_unused]] = std::input_iterator_tag;
using difference_type = typename RangeIterator::difference_type;
using reference = value_type&;
using pointer = void;

Iterator(
RangeIterator it,
RangeSentinel sentinel)
: it_{std::move(it)},
sentinel_{std::move(sentinel)} {}

Iterator()
requires(std::default_initializable<RangeIterator> && std::default_initializable<RangeSentinel>)
= default;

reference operator*() const {
if (!cached_value_) {
cached_value_ = *it_;
}
return *cached_value_;
}

Iterator operator++(int) { return std::exchange(*this, ++Iterator{*this}); }
Iterator& operator++() {
++it_;
cached_value_ = std::nullopt;
return *this;
}

friend bool operator==(const Iterator& it, const std::default_sentinel_t&) {
return it.it_ == it.sentinel_;
}
friend bool operator!=(const Iterator& it, const std::default_sentinel_t&) {
return it.it_ != it.sentinel_;
}
friend bool operator==(const std::default_sentinel_t&, const Iterator& it) {
return it.sentinel_ == it.it_;
}
friend bool operator!=(const std::default_sentinel_t&, const Iterator& it) {
return it.sentinel_ != it.it_;
}

private:
RangeIterator it_;
RangeSentinel sentinel_;
mutable std::optional<value_type> cached_value_;
};

static_assert(std::input_iterator<Iterator>);

explicit CachingView(TRange&& range)
: range_{std::move(range)} {}

CachingView()
requires std::default_initializable<TRange>
= default;

CachingView(CachingView&&) = default;
CachingView& operator=(CachingView&&) noexcept = default;

Iterator begin() { return Iterator{std::ranges::begin(range_), std::ranges::end(range_)}; }
std::default_sentinel_t end() const { return std::default_sentinel; }

private:
TRange range_;
};

struct CachingViewFactory {
template <class TRange>
constexpr CachingView<TRange> operator()(TRange&& range) const {
return CachingView<TRange>{std::forward<TRange>(range)};
}

template <class TRange>
friend constexpr CachingView<TRange> operator|(TRange&& range, const CachingViewFactory& caching) {
return caching(std::forward<TRange>(range));
}
};

inline constexpr CachingViewFactory caching;

} // namespace silkworm::views
3 changes: 3 additions & 0 deletions silkworm/db/datastore/kvdb/big_endian_codec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,10 @@ struct BigEndianU64Codec : public Codec {
uint64_t value{0};
Bytes data;

BigEndianU64Codec() = default;
explicit BigEndianU64Codec(uint64_t value1) : value{value1} {}
~BigEndianU64Codec() override = default;

Slice encode() override;
void decode(Slice slice) override;
};
Expand Down
1 change: 1 addition & 0 deletions silkworm/db/datastore/kvdb/cursor_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ bool operator==(const CursorIterator& lhs, const CursorIterator& rhs) {
((!lhs.decoders_.first && !lhs.decoders_.second) || (lhs.cursor_ == rhs.cursor_));
}

static_assert(std::input_iterator<CursorMoveIterator>);
static_assert(std::input_iterator<CursorIterator>);
static_assert(std::input_iterator<CursorKVIterator<RawDecoder<Bytes>, RawDecoder<Bytes>>>);
static_assert(std::input_iterator<CursorKeysIterator<RawDecoder<Bytes>>>);
Expand Down
39 changes: 39 additions & 0 deletions silkworm/db/datastore/kvdb/cursor_iterator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,45 @@

namespace silkworm::datastore::kvdb {

class CursorMoveIterator {
public:
using value_type = std::shared_ptr<ROCursor>;
using iterator_category [[maybe_unused]] = std::input_iterator_tag;
using difference_type = std::ptrdiff_t;
using pointer = value_type*;
using reference = value_type&;

CursorMoveIterator() = default;

CursorMoveIterator(
std::shared_ptr<ROCursor> cursor,
MoveOperation move_op)
: cursor_{std::move(cursor)},
move_op_{move_op} {}

const value_type& operator*() const { return cursor_; }
const value_type* operator->() const { return &cursor_; }

CursorMoveIterator operator++(int) { return std::exchange(*this, ++CursorMoveIterator{*this}); }
CursorMoveIterator& operator++() {
if (((move_op_ == MoveOperation::get_current) && cursor_->eof()) || !cursor_->move(move_op_, false)) {
cursor_.reset();
}
return *this;
}

friend bool operator!=(const CursorMoveIterator& it, const std::default_sentinel_t&) {
return !!it.cursor_;
}
friend bool operator==(const CursorMoveIterator& it, const std::default_sentinel_t&) {
return !it.cursor_;
}

private:
std::shared_ptr<ROCursor> cursor_;
MoveOperation move_op_{MoveOperation::next};
};

class CursorIterator {
public:
using value_type = std::pair<std::shared_ptr<Decoder>, std::shared_ptr<Decoder>>;
Expand Down
1 change: 1 addition & 0 deletions silkworm/db/datastore/kvdb/history_queries.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@
#include "history_delete_query.hpp"
#include "history_get_query.hpp"
#include "history_put_query.hpp"
#include "history_range_query.hpp"
159 changes: 159 additions & 0 deletions silkworm/db/datastore/kvdb/history_range_query.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
/*
Copyright 2025 The Silkworm Authors

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

#pragma once

#include <functional>
#include <ranges>
#include <variant>

#include <silkworm/core/common/assert.hpp>

#include "../common/caching_view.hpp"
#include "../common/timestamp.hpp"
#include "cursor_iterator.hpp"
#include "history.hpp"
#include "history_codecs.hpp"
#include "mdbx.hpp"
#include "raw_codec.hpp"

namespace silkworm::datastore::kvdb {

template <EncoderConcept TKeyDecoder, EncoderConcept TValueDecoder>
struct HistoryRangeQuery {
ROTxn& tx;
History entity;

using Key = decltype(TKeyDecoder::value);
using Value = decltype(TValueDecoder::value);

template <typename T>
static constexpr bool as_bool_predicate(const T& v) {
return !!v;
}

static std::pair<Key, Value> kv_pair_from_cursor(std::shared_ptr<ROCursor> cursor, bool has_large_values) {
SILKWORM_ASSERT(cursor);
CursorIterator any_it{
std::move(cursor),
MoveOperation::next,
std::make_shared<HistoryKeyDecoder<TKeyDecoder>>(has_large_values),
std::make_shared<HistoryValueDecoder<TValueDecoder>>(has_large_values),
};
CursorKVIterator<HistoryKeyDecoder<TKeyDecoder>, HistoryValueDecoder<TValueDecoder>> it{std::move(any_it)};

auto kv_pair = *it;
Key& key = kv_pair.first.key.value;
Value& value = kv_pair.second.value.value;
return std::pair{std::move(key), std::move(value)};
}

static auto kv_pair_from_cursor_func(bool has_large_values) {
return [=](std::shared_ptr<ROCursor> cursor) -> std::pair<Key, Value> {
return kv_pair_from_cursor(std::move(cursor), has_large_values);
};
}

auto exec_with_eager_begin(TimestampRange ts_range, bool ascending) {
SILKWORM_ASSERT(ascending); // descending is not implemented

CursorMoveIterator begin_it;
std::function<std::shared_ptr<ROCursor>(std::shared_ptr<ROCursor>)> seek_func;

if (entity.has_large_values) {
auto begin_cursor = tx.ro_cursor(entity.values_table);
if (begin_cursor->to_first(false)) {
begin_it = CursorMoveIterator{std::move(begin_cursor), MoveOperation::get_current};

seek_func = [ts_range, has_large_values = entity.has_large_values, skip_current_key = std::make_shared<bool>()](std::shared_ptr<ROCursor> cursor) -> std::shared_ptr<ROCursor> {
auto result = cursor->current();
SILKWORM_ASSERT(result);

HistoryKeyDecoder<RawDecoder<ByteView>> key_decoder{has_large_values};
key_decoder.decode(result.key);

if (*skip_current_key) {
Bytes current_key{key_decoder.value.key.value};
do {
result = cursor->to_next(false);
if (!result) return {};
key_decoder.decode(result.key);
} while (key_decoder.value.key.value == current_key);
}

HistoryKeyEncoder<RawEncoder<ByteView>> seek_key_encoder{has_large_values};
seek_key_encoder.value.key.value = key_decoder.value.key.value;
seek_key_encoder.value.timestamp.value = ts_range.start;
Slice seek_key = seek_key_encoder.encode();

result = cursor->lower_bound(seek_key, false);
if (result) {
key_decoder.decode(result.key);
// if we jumped over to the next key, ts_range.start might be invalid
if (key_decoder.value.timestamp.value < ts_range.start) {
*skip_current_key = false;
return {};
} else if (key_decoder.value.timestamp.value < ts_range.end) {
*skip_current_key = true;
return cursor;
}
*skip_current_key = true;
return {};
}
return {};
};
}
} else {
auto begin_cursor = tx.ro_cursor_dup_sort(entity.values_table);
if (begin_cursor->to_first(false)) {
begin_it = CursorMoveIterator{std::move(begin_cursor), MoveOperation::multi_nextkey_firstvalue};

seek_func = [ts_range, has_large_values = entity.has_large_values](std::shared_ptr<ROCursor> base_cursor) -> std::shared_ptr<ROCursor> {
auto cursor = base_cursor->clone();
auto result = cursor->current();
SILKWORM_ASSERT(result);

TimestampEncoder ts_range_start_encoder{ts_range.start};
result = dynamic_cast<ROCursorDupSort&>(*cursor).lower_bound_multivalue(result.key, ts_range_start_encoder.encode(), false);
if (result) {
HistoryValueDecoder<RawDecoder<ByteView>> value_decoder{has_large_values};
value_decoder.decode(result.value);
if (value_decoder.value.timestamp.value < ts_range.end) {
return std::shared_ptr<ROCursor>{std::move(cursor)};
}
}
return {};
};
}
}

return std::ranges::subrange{std::move(begin_it), std::default_sentinel} |
std::views::transform(std::move(seek_func)) |
silkworm::views::caching |
std::views::filter(as_bool_predicate<std::shared_ptr<ROCursor>>) |
std::views::transform(kv_pair_from_cursor_func(entity.has_large_values));
}

auto exec(TimestampRange ts_range, bool ascending) {
auto exec_func = [query = *this, ts_range, ascending](std::monostate) mutable {
return query.exec_with_eager_begin(ts_range, ascending);
};
// turn into a lazy view that runs exec_func only when iteration is started using range::begin()
return std::views::single(std::monostate{}) | std::views::transform(std::move(exec_func)) | std::views::join;
}
};

} // namespace silkworm::datastore::kvdb
Loading
Loading