Skip to content

Commit

Permalink
Merge pull request #9098 from EOSIO/resource-monitor-plugin
Browse files Browse the repository at this point in the history
Initial checkin of resource monior plugin
  • Loading branch information
linhuang-blockone authored Jun 9, 2020
2 parents aab3c41 + 1e613c7 commit 8241509
Show file tree
Hide file tree
Showing 24 changed files with 1,417 additions and 4 deletions.
5 changes: 4 additions & 1 deletion libraries/chain/controller.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -418,10 +418,13 @@ struct controller_impl {

emit( self.irreversible_block, *bitr );

// blog.append could fail due to failures like running out of space.
// Do it before commit so that in case it throws, DB can be rolled back.
blog.append( (*bitr)->block );

db.commit( (*bitr)->block_num );
root_id = (*bitr)->id;

blog.append( (*bitr)->block );

auto rbitr = rbi.begin();
while( rbitr != rbi.end() && rbitr->blocknum <= (*bitr)->block_num ) {
Expand Down
1 change: 1 addition & 0 deletions plugins/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ add_subdirectory(history_plugin)
add_subdirectory(history_api_plugin)
add_subdirectory(state_history_plugin)
add_subdirectory(trace_api_plugin)
add_subdirectory(resource_monitor_plugin)

add_subdirectory(wallet_plugin)
add_subdirectory(wallet_api_plugin)
Expand Down
4 changes: 2 additions & 2 deletions plugins/chain_plugin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ add_library( chain_plugin
chain_plugin.cpp
${HEADERS} )

target_link_libraries( chain_plugin eosio_chain appbase )
target_include_directories( chain_plugin PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include" "${CMAKE_CURRENT_SOURCE_DIR}/../chain_interface/include" "${CMAKE_CURRENT_SOURCE_DIR}/../../libraries/appbase/include")
target_link_libraries( chain_plugin eosio_chain appbase resource_monitor_plugin )
target_include_directories( chain_plugin PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include" "${CMAKE_CURRENT_SOURCE_DIR}/../chain_interface/include" "${CMAKE_CURRENT_SOURCE_DIR}/../../libraries/appbase/include" "${CMAKE_CURRENT_SOURCE_DIR}/../resource_monitor_plugin/include")
7 changes: 7 additions & 0 deletions plugins/chain_plugin/chain_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

#include <eosio/chain/eosio_contract.hpp>

#include <eosio/resource_monitor_plugin/resource_monitor_plugin.hpp>

#include <chainbase/environment.hpp>

#include <boost/signals2/connection.hpp>
Expand Down Expand Up @@ -735,6 +737,11 @@ void chain_plugin::plugin_initialize(const variables_map& options) {
my->chain_config->state_dir = app().data_dir() / config::default_state_dir_name;
my->chain_config->read_only = my->readonly;

if (auto resmon_plugin = app().find_plugin<resource_monitor_plugin>()) {
resmon_plugin->monitor_directory(my->chain_config->blocks_dir);
resmon_plugin->monitor_directory(my->chain_config->state_dir);
}

if( options.count( "chain-state-db-size-mb" ))
my->chain_config->state_size = options.at( "chain-state-db-size-mb" ).as<uint64_t>() * 1024 * 1024;

Expand Down
5 changes: 5 additions & 0 deletions plugins/producer_plugin/producer_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <eosio/chain/transaction_object.hpp>
#include <eosio/chain/thread_utils.hpp>
#include <eosio/chain/unapplied_transaction_queue.hpp>
#include <eosio/resource_monitor_plugin/resource_monitor_plugin.hpp>

#include <fc/io/json.hpp>
#include <fc/log/logger_config.hpp>
Expand Down Expand Up @@ -897,6 +898,10 @@ void producer_plugin::plugin_initialize(const boost::program_options::variables_

EOS_ASSERT( fc::is_directory(my->_snapshots_dir), snapshot_directory_not_found_exception,
"No such directory '${dir}'", ("dir", my->_snapshots_dir.generic_string()) );

if (auto resmon_plugin = app().find_plugin<resource_monitor_plugin>()) {
resmon_plugin->monitor_directory(my->_snapshots_dir);
}
}

my->_incoming_block_subscription = app().get_channel<incoming::channels::block>().subscribe(
Expand Down
11 changes: 11 additions & 0 deletions plugins/resource_monitor_plugin/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
file(GLOB HEADERS "include/eosio/resource_monitor_plugin/*.hpp")
add_library( resource_monitor_plugin
resource_monitor_plugin.cpp
system_file_space_provider.cpp
resmon_impl.cpp
${HEADERS} )

target_link_libraries( resource_monitor_plugin appbase fc chain_plugin)
target_include_directories( resource_monitor_plugin PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include" )

add_subdirectory( test )
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
#pragma once

#include <boost/filesystem.hpp>
#include <boost/asio.hpp>

#include <appbase/application.hpp>
#include <eosio/chain/exceptions.hpp>

namespace bfs = boost::filesystem;

namespace eosio::resource_monitor {
template<typename SpaceProvider>
class file_space_handler {
public:
file_space_handler(SpaceProvider&& space_provider, boost::asio::io_context& ctx)
:space_provider(std::move(space_provider)),
timer{ctx}
{
}

void set_sleep_time(uint32_t sleep_time) {
sleep_time_in_secs = sleep_time;
}

// warning_threshold must be less than shutdown_threshold.
// set them together so it is simpler to check.
void set_threshold(uint32_t new_threshold, uint32_t new_warning_threshold) {
EOS_ASSERT(new_warning_threshold < new_threshold, chain::plugin_config_exception,
"warning_threshold ${new_warning_threshold} must be less than threshold ${new_threshold}", ("new_warning_threshold", new_warning_threshold) ("new_threshold", new_threshold));

shutdown_threshold = new_threshold;
warning_threshold = new_warning_threshold;
}

void set_shutdown_on_exceeded(bool new_shutdown_on_exceeded) {
shutdown_on_exceeded = new_shutdown_on_exceeded;
}

bool is_threshold_exceeded() const {
// Go over each monitored file system
for (auto& fs: filesystems) {
boost::system::error_code ec;
auto info = space_provider.get_space(fs.path_name, ec);
if ( ec ) {
// As the system is running and this plugin is not a critical
// part of the system, we should not exit.
// Just report the failure and continue;
wlog( "Unable to get space info for ${path_name}: [code: ${ec}] ${message}. Ignore this failure.",
("path_name", fs.path_name.string())
("ec", ec.value())
("message", ec.message()));

continue;
}

if ( info.available < fs.warning_available ) {
wlog("Space usage on ${path}'s file system approaching threshold. available: ${available}, warning_available: ${warning_available}", ("path", fs.path_name.string()) ("available", info.available) ("warning_available", fs.warning_available));
if ( shutdown_on_exceeded ) {
wlog("nodeos will shutdown when space usage exceeds threshold ${threshold}%", ("threshold", shutdown_threshold));
}

if ( info.available < fs.shutdown_available ) {
wlog("Space usage on ${path}'s file system exceeded threshold ${threshold}%, available: ${available}, Capacity: ${capacity}, shutdown_available: ${shutdown_available}", ("path", fs.path_name.string()) ("threshold", shutdown_threshold) ("available", info.available) ("capacity", info.capacity) ("shutdown_available", fs.shutdown_available));

return true;
}
}
}

return false;
}

void add_file_system(const bfs::path& path_name) {
// Get detailed information of the path
struct stat statbuf;
auto status = space_provider.get_stat(path_name.string().c_str(), &statbuf);
EOS_ASSERT(status == 0, chain::plugin_config_exception,
"Failed to run stat on ${path} with status ${status}", ("path", path_name.string())("status", status));

dlog("${path_name}'s file system to be monitored", ("path_name", path_name.string()));

// If the file system containing the path is already
// in the filesystem list, do not add it again
for (auto& fs: filesystems) {
if (statbuf.st_dev == fs.st_dev) { // Two files belong to the same file system if their device IDs are the same.
dlog("${path_name}'s file system already monitored", ("path_name", path_name.string()));

return;
}
}

// For efficiency, precalculate threshold values to avoid calculating it
// everytime we check space usage. Since bfs::space returns
// available amount, we use minimum available amount as threshold.
boost::system::error_code ec;
auto info = space_provider.get_space(path_name, ec);
EOS_ASSERT(!ec, chain::plugin_config_exception,
"Unable to get space info for ${path_name}: [code: ${ec}] ${message}",
("path_name", path_name.string())
("ec", ec.value())
("message", ec.message()));

auto shutdown_available = (100 - shutdown_threshold) * (info.capacity / 100); // (100 - shutdown_threshold)/100 is the percentage of minimum number of available bytes the file system must maintain
auto warning_available = (100 - warning_threshold) * (info.capacity / 100);

// Add to the list
filesystems.emplace_back(statbuf.st_dev, shutdown_available, path_name, warning_available);

ilog("${path_name}'s file system monitored. shutdown_available: ${shutdown_available}, capacity: ${capacity}, threshold: ${threshold}", ("path_name", path_name.string()) ("shutdown_available", shutdown_available) ("capacity", info.capacity) ("threshold", shutdown_threshold) );
}

void space_monitor_loop() {
if ( is_threshold_exceeded() && shutdown_on_exceeded ) {
wlog("Shutting down");
appbase::app().quit(); // This will gracefully stop Nodeos
return;
}

timer.expires_from_now( boost::posix_time::seconds( sleep_time_in_secs ));
timer.async_wait([this](auto& ec) {
if ( ec ) {
wlog("Exit due to error: ${rc}, message: ${message}",
("ec", ec.value())
("message", ec.message()));
return;
} else {
// Loop over
space_monitor_loop();
}
});
}

private:
SpaceProvider space_provider;

boost::asio::deadline_timer timer;

uint32_t sleep_time_in_secs {2};
uint32_t shutdown_threshold {90};
uint32_t warning_threshold {85};
bool shutdown_on_exceeded {true};

struct filesystem_info {
dev_t st_dev; // device id of file system containing "file_path"
uintmax_t shutdown_available {0}; // minimum number of available bytes the file system must maintain
bfs::path path_name;
uintmax_t warning_available {0}; // warning is issued when availabla number of bytese drops below warning_available

filesystem_info(dev_t dev, uintmax_t available, const bfs::path& path, uintmax_t warning)
: st_dev(dev),
shutdown_available(available),
path_name(path),
warning_available(warning)
{
}
};

// Stores file systems to be monitored. Duplicate
// file systems are not stored.
std::vector<filesystem_info> filesystems;
};
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#pragma once
#include <appbase/application.hpp>
#include <eosio/chain_plugin/chain_plugin.hpp>

#include <eosio/resource_monitor_plugin/file_space_handler.hpp>
#include <eosio/resource_monitor_plugin/system_file_space_provider.hpp>

namespace bfs = boost::filesystem;
using namespace eosio::resource_monitor;

namespace eosio {

using namespace appbase;

class resource_monitor_plugin_impl {
public:
resource_monitor_plugin_impl();

void set_program_options(appbase::options_description&, appbase::options_description& cfg);

void plugin_initialize(const appbase::variables_map& options);

// Start main thread
void plugin_startup();

// System is shutting down.
void plugin_shutdown();

void monitor_directory(const bfs::path& path);

private:
std::thread monitor_thread;
std::vector<bfs::path> directories_registered;

static constexpr uint32_t def_interval_in_secs = 2;
static constexpr uint32_t interval_low = 1;
static constexpr uint32_t interval_high = 300;

static constexpr uint32_t def_space_threshold = 90; // in percentage
static constexpr uint32_t space_threshold_low = 6; // in percentage
static constexpr uint32_t space_threshold_high = 99; // in percentage
static constexpr uint32_t space_threshold_warning_diff = 5; // Warning issued when space used reached (threshold - space_threshold_warning_diff). space_threshold_warning_diff must be smaller than space_threshold_low

boost::asio::io_context ctx;

using file_space_handler_t = file_space_handler<system_file_space_provider>;
file_space_handler_t space_handler;
};

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#pragma once
#include <appbase/application.hpp>
#include <eosio/chain_plugin/chain_plugin.hpp>

namespace eosio {

using namespace appbase;

class resource_monitor_plugin : public appbase::plugin<resource_monitor_plugin> {
public:
resource_monitor_plugin( );
virtual ~resource_monitor_plugin();

APPBASE_PLUGIN_REQUIRES( (chain_plugin) )
virtual void set_program_options(options_description&, options_description& cfg) override;

void plugin_initialize(const variables_map& options);
void plugin_startup();
void plugin_shutdown();

// Called by plugins and other components to request
// directory monitoring
void monitor_directory(const bfs::path& path);

private:
std::unique_ptr<class resource_monitor_plugin_impl> my;
};

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#pragma once

#include <sys/stat.h>
#include <boost/filesystem.hpp>

namespace bfs = boost::filesystem;

namespace eosio::resource_monitor {
class system_file_space_provider {
public:
system_file_space_provider()
{
}

// Wrapper for Linux stat
int get_stat(const char *path, struct stat *buf) const;

// Wrapper for boost file system space
bfs::space_info get_space(const bfs::path& p, boost::system::error_code& ec) const;
};
}
Loading

0 comments on commit 8241509

Please sign in to comment.