Skip to content

Commit

Permalink
Merge pull request #3059 from graydon/bug-2607-stream-debug-meta-files
Browse files Browse the repository at this point in the history
Bug 2607 stream debug meta files

Reviewed-by: MonsieurNicolas
  • Loading branch information
latobarita authored Jun 19, 2021
2 parents 0c98431 + 3c52357 commit be75bd1
Show file tree
Hide file tree
Showing 19 changed files with 595 additions and 287 deletions.
2 changes: 2 additions & 0 deletions Builds/VisualStudio/stellar-core.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,7 @@ exit /b 0
<ClCompile Include="..\..\src\invariant\test\OrderBookIsNotCrossedTests.cpp" />
<ClCompile Include="..\..\src\invariant\test\SponsorshipCountIsValidTests.cpp" />
<ClCompile Include="..\..\src\ledger\CheckpointRange.cpp" />
<ClCompile Include="..\..\src\ledger\FlushAndRotateMetaDebugWork.cpp" />
<ClCompile Include="..\..\src\ledger\LedgerHeaderUtils.cpp" />
<ClCompile Include="..\..\src\ledger\LedgerManagerImpl.cpp" />
<ClCompile Include="..\..\src\ledger\LedgerRange.cpp" />
Expand Down Expand Up @@ -780,6 +781,7 @@ exit /b 0
<ClInclude Include="..\..\src\invariant\SponsorshipCountIsValid.h" />
<ClInclude Include="..\..\src\invariant\test\InvariantTestUtils.h" />
<ClInclude Include="..\..\src\ledger\CheckpointRange.h" />
<ClInclude Include="..\..\src\ledger\FlushAndRotateMetaDebugWork.h" />
<ClInclude Include="..\..\src\ledger\LedgerHashUtils.h" />
<ClInclude Include="..\..\src\ledger\LedgerHeaderUtils.h" />
<ClInclude Include="..\..\src\ledger\LedgerManager.h" />
Expand Down
6 changes: 6 additions & 0 deletions Builds/VisualStudio/stellar-core.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,9 @@
<ClCompile Include="..\..\src\ledger\CheckpointRange.cpp">
<Filter>ledger</Filter>
</ClCompile>
<ClCompile Include="..\..\src\ledger\FlushAndRotateMetaDebugWork.cpp">
<Filter>ledger</Filter>
</ClCompile>
<ClCompile Include="..\..\src\ledger\LedgerHeaderUtils.cpp">
<Filter>ledger</Filter>
</ClCompile>
Expand Down Expand Up @@ -1631,6 +1634,9 @@
<ClInclude Include="..\..\src\ledger\CheckpointRange.h">
<Filter>ledger</Filter>
</ClInclude>
<ClInclude Include="..\..\src\ledger\FlushAndRotateMetaDebugWork.h">
<Filter>ledger</Filter>
</ClInclude>
<ClInclude Include="..\..\src\ledger\LedgerHashUtils.h">
<Filter>ledger</Filter>
</ClInclude>
Expand Down
6 changes: 6 additions & 0 deletions docs/stellar-core_example.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,12 @@ METADATA_OUTPUT_STREAM=""
# using --in-memory on the command line).
EXPERIMENTAL_PRECAUTION_DELAY_META=false

# Number of ledgers worth of transaction metadata to preserve on disk for
# debugging purposes. These records are automatically maintained and rotated
# during processing, and are helpful for recovery in case of a serious error;
# they should only be reduced or disabled if disk space is at a premium.
METADATA_DEBUG_LEDGERS=0

# EXCLUDE_TRANSACTIONS_CONTAINING_OPERATION_TYPE (list of strings) default is empty
# Setting this will cause the node to reject transactions that it receives if
# they contain any operation in this list. It will not, however, stop the node
Expand Down
201 changes: 201 additions & 0 deletions src/ledger/FlushAndRotateMetaDebugWork.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
// Copyright 2021 Stellar Development Foundation and contributors. Licensed
// under the Apache License, Version 2.0. See the COPYING file at the root
// of this distribution or at http://www.apache.org/licenses/LICENSE-2.0

#include "ledger/FlushAndRotateMetaDebugWork.h"
#include "bucket/BucketManager.h"
#include "crypto/Hex.h"
#include "crypto/Random.h"
#include "util/Fs.h"
#include "util/GlobalChecks.h"
#include <filesystem>
#include <memory>
#include <regex>
#include <stdexcept>
#include <system_error>

namespace
{
const std::string META_DEBUG_DIRNAME{"meta-debug"};
const std::string META_DEBUG_FILE_FMT_STR{"meta-debug-{:08x}-{}.xdr"};
const std::regex META_DEBUG_FILE_REGEX{
"meta-debug-[[:xdigit:]]+-[[:xdigit:]]+\\.xdr(\\.gz)?"};

// This number can be changed in the future without any coordination,
// it just controls the granularity of new meta-debug XDR segments.
//
// 256 ledgers == ~21 minutes. At time of writing, ~5mb meta / minute
// gives ~105mb meta / segment, which should compress to ~20mb.
const uint32_t META_DEBUG_LEDGER_SEGMENT_SIZE = 256;
}

namespace stellar
{

FlushAndRotateMetaDebugWork::FlushAndRotateMetaDebugWork(
Application& app, std::filesystem::path const& metaDebugPath,
std::unique_ptr<XDROutputFileStream> metaDebugFile, uint32_t ledgersToKeep)
: Work(app, "flush and rotate meta-debug", BasicWork::RETRY_NEVER)
, mMetaDebugPath(metaDebugPath)
, mMetaDebugFile(std::move(metaDebugFile))
, mLedgersToKeep(ledgersToKeep)
{
}

std::filesystem::path
FlushAndRotateMetaDebugWork::getMetaDebugDirPath(
std::filesystem::path const& bucketDir)
{
return bucketDir / META_DEBUG_DIRNAME;
}

std::filesystem::path
FlushAndRotateMetaDebugWork::getMetaDebugFilePath(
std::filesystem::path const& bucketDir, uint32_t seqNum)
{
auto file =
fmt::format(META_DEBUG_FILE_FMT_STR, seqNum, binToHex(randomBytes(8)));
return getMetaDebugDirPath(bucketDir) / file;
}

std::vector<std::filesystem::path>
FlushAndRotateMetaDebugWork::listMetaDebugFiles(
std::filesystem::path const& bucketDir)
{
auto dir = getMetaDebugDirPath(bucketDir);
auto files = fs::findfiles(dir.string(), [](std::string const& file) {
return std::regex_match(file, META_DEBUG_FILE_REGEX);
});
std::sort(files.begin(), files.end());
return std::vector<std::filesystem::path>(files.begin(), files.end());
}

bool
FlushAndRotateMetaDebugWork::isDebugSegmentBoundary(uint32_t ledgerSeq)
{
return ledgerSeq % (META_DEBUG_LEDGER_SEGMENT_SIZE - 1) == 0;
}

size_t
FlushAndRotateMetaDebugWork::getNumberOfDebugFilesToKeep(uint32_t numLedgers)
{
size_t segLen = META_DEBUG_LEDGER_SEGMENT_SIZE;
return (numLedgers + segLen - 1) / segLen;
}

BasicWork::State
FlushAndRotateMetaDebugWork::doWork()
{
// Step 1: transfer ownership of mMetaDebugFile to background thread
// and flush/fsync it. When that completes, it will post an action
// back to the main thread that unblocks this work to continue with
// gzip'ing and rotating.
if (mMetaDebugFile)
{
std::weak_ptr<FlushAndRotateMetaDebugWork> weak =
std::static_pointer_cast<FlushAndRotateMetaDebugWork>(
shared_from_this());

// This is a little silly, but we have ownership of a non-copyable
// unique_ptr here, and we want to transfer that ownership into a
// closure held by a std::function -- which unfortunately requires
// its body to be copy-constructable. So we double-indirect through
// something copy-constructble: a shared_ptr<unique_ptr<...>>.
using OwnedStream = std::unique_ptr<XDROutputFileStream>;
auto file = std::make_shared<OwnedStream>(std::move(mMetaDebugFile));
mApp.postOnBackgroundThread(
[weak, file]() {
auto self = weak.lock();
if (!self)
{
return;
}

// First close() here, which will call fsync(), which blocks.
CLOG_DEBUG(Ledger, "closing meta-debug file {}",
self->mMetaDebugPath.string());
try
{
(*file)->close();
}
catch (std::runtime_error& e)
{
// If we fail to close here, we're going to just eat the
// error and carry on to trying to gzip. Hopefully this
// doesn't cause a filehandle leak or something similarly
// horrible.
CLOG_WARNING(Ledger,
"Failed to close debug metadata stream: {}",
e.what());
}

// Then post back to main thread.
self->mApp.postOnMainThread(
[weak]() {
auto self = weak.lock();
if (self)
{
self->wakeUp();
}
},
"wake up gzip and rotate meta-debug");
},
"close and fsync meta-debug");
return BasicWork::State::WORK_WAITING;
}

// Step 2: wait for the creation and completion of mGzipFileWork.
if (!mGzipFileWork)
{
CLOG_DEBUG(Ledger, "compressing meta-debug file {}",
mMetaDebugPath.string());
mGzipFileWork = addWork<GzipFileWork>(mMetaDebugPath.string());
return BasicWork::State::WORK_RUNNING;
}
if (!mGzipFileWork->isDone())
{
return mGzipFileWork->getState();
}
if (mGzipFileWork->getState() == State::WORK_SUCCESS)
{
CLOG_DEBUG(Ledger, "compressed meta-debug file {}",
mMetaDebugPath.string());
}
else
{
CLOG_ERROR(Ledger, "failed to compress meta-debug file {}",
mMetaDebugPath.string());
return State::WORK_FAILURE;
}

// Step 3: synchronously rotate the meta files in the directory, whether
// gzipped or not -- non-gzipped ones are left behind when users kill or
// restart core processes.
auto bucketDir = mApp.getBucketManager().getBucketDir();
auto dir = getMetaDebugDirPath(bucketDir);
auto files = listMetaDebugFiles(bucketDir);
auto keep = getNumberOfDebugFilesToKeep(mLedgersToKeep);
if (files.size() > keep)
{
// Forget about the most-recent (highest-sorting) keep files -- they get
// to survive.
files.resize(files.size() - keep);

// Delete all (size()-keep) younger (lower-sorting) files not forgotten
// above.
for (auto const& file : files)
{
releaseAssert(
std::regex_match(file.string(), META_DEBUG_FILE_REGEX));
auto f = dir / file;
CLOG_DEBUG(Ledger, "trimming old meta-debug file {}", f.string());
std::error_code ec;
std::filesystem::remove(f, ec);
// Ignore errors: there's nothing we can do to "try harder" and
// failing the work is not helpful. We'll just try again.
}
}
return State::WORK_SUCCESS;
}

}
45 changes: 45 additions & 0 deletions src/ledger/FlushAndRotateMetaDebugWork.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// Copyright 2021 Stellar Development Foundation and contributors. Licensed
// under the Apache License, Version 2.0. See the COPYING file at the root
// of this distribution or at http://www.apache.org/licenses/LICENSE-2.0

#pragma once

#include "historywork/GzipFileWork.h"
#include "util/XDRStream.h"
#include <filesystem>

namespace stellar
{

class FlushAndRotateMetaDebugWork : public Work
{
std::filesystem::path mMetaDebugPath;
std::unique_ptr<XDROutputFileStream> mMetaDebugFile;
std::shared_ptr<GzipFileWork> mGzipFileWork;
uint32_t mLedgersToKeep;

public:
FlushAndRotateMetaDebugWork(
Application& app, std::filesystem::path const& metaDebugPath,
std::unique_ptr<XDROutputFileStream> metaDebugFile,
uint32_t ledgersToKeep);
~FlushAndRotateMetaDebugWork() = default;

static std::filesystem::path
getMetaDebugFilePath(std::filesystem::path const& bucketDir,
uint32_t seqNum);

static std::filesystem::path
getMetaDebugDirPath(std::filesystem::path const& bucketDir);

static std::vector<std::filesystem::path>
listMetaDebugFiles(std::filesystem::path const& bucketDir);

static bool isDebugSegmentBoundary(uint32_t ledgerSeq);

static size_t getNumberOfDebugFilesToKeep(uint32_t ledgersToKeep);

protected:
BasicWork::State doWork() override;
};
}
Loading

0 comments on commit be75bd1

Please sign in to comment.