diff --git a/src/blockchain_db/berkeleydb/db_bdb.cpp b/src/blockchain_db/berkeleydb/db_bdb.cpp index 221c0cf2e..efe8528cd 100644 --- a/src/blockchain_db/berkeleydb/db_bdb.cpp +++ b/src/blockchain_db/berkeleydb/db_bdb.cpp @@ -1595,7 +1595,7 @@ bool BlockchainBDB::has_key_image(const crypto::key_image& img) const // Ostensibly BerkeleyDB has batch transaction support built-in, // so the following few functions will be NOP. -void BlockchainBDB::batch_start() +void BlockchainBDB::batch_start(uint64_t batch_num_blocks) { LOG_PRINT_L3("BlockchainBDB::" << __func__); } diff --git a/src/blockchain_db/berkeleydb/db_bdb.h b/src/blockchain_db/berkeleydb/db_bdb.h index 83588b031..da57cabb1 100644 --- a/src/blockchain_db/berkeleydb/db_bdb.h +++ b/src/blockchain_db/berkeleydb/db_bdb.h @@ -191,7 +191,7 @@ public: ); virtual void set_batch_transactions(bool batch_transactions); - virtual void batch_start(); + virtual void batch_start(uint64_t batch_num_blocks=0); virtual void batch_commit(); virtual void batch_stop(); virtual void batch_abort(); diff --git a/src/blockchain_db/blockchain_db.h b/src/blockchain_db/blockchain_db.h index 46c860122..c4ca328f6 100644 --- a/src/blockchain_db/blockchain_db.h +++ b/src/blockchain_db/blockchain_db.h @@ -357,7 +357,7 @@ public: // release db lock virtual void unlock() = 0; - virtual void batch_start() = 0; + virtual void batch_start(uint64_t batch_num_blocks=0) = 0; virtual void batch_stop() = 0; virtual void set_batch_transactions(bool) = 0; diff --git a/src/blockchain_db/lmdb/db_lmdb.cpp b/src/blockchain_db/lmdb/db_lmdb.cpp index 0ed044954..0d0b8ba87 100644 --- a/src/blockchain_db/lmdb/db_lmdb.cpp +++ b/src/blockchain_db/lmdb/db_lmdb.cpp @@ -233,7 +233,7 @@ void mdb_txn_safe::allow_new_txns() -void BlockchainLMDB::do_resize() +void BlockchainLMDB::do_resize(uint64_t increase_size) { MDB_envinfo mei; @@ -244,6 +244,11 @@ void BlockchainLMDB::do_resize() mdb_env_stat(m_env, &mst); uint64_t new_mapsize = (double)mei.me_mapsize * RESIZE_FACTOR; + // If given, use increase_size intead of above way of resizing. + // This is currently used for increasing by an estimated size at start of new + // batch txn. + if (increase_size > 0) + new_mapsize = mei.me_mapsize + increase_size; new_mapsize += (new_mapsize % mst.ms_psize); @@ -272,7 +277,8 @@ void BlockchainLMDB::do_resize() mdb_txn_safe::allow_new_txns(); } -bool BlockchainLMDB::need_resize() const +// threshold_size is used for batch transactions +bool BlockchainLMDB::need_resize(uint64_t threshold_size) const { MDB_envinfo mei; @@ -282,15 +288,107 @@ bool BlockchainLMDB::need_resize() const mdb_env_stat(m_env, &mst); + // size_used doesn't include data yet to be committed, which can be + // significant size during batch transactions. For that, we estimate the size + // needed at the beginning of the batch transaction and pass in the + // additional size needed. uint64_t size_used = mst.ms_psize * mei.me_last_pgno; + LOG_PRINT_L1("DB map size: " << mei.me_mapsize); + LOG_PRINT_L1("Space used: " << size_used); + LOG_PRINT_L1("Space remaining: " << mei.me_mapsize - size_used); + LOG_PRINT_L1("Size threshold: " << threshold_size); + LOG_PRINT_L1("Percent used: " << (double)size_used/mei.me_mapsize << " Percent threshold: " << RESIZE_PERCENT); + + if (threshold_size > 0) + { + if (mei.me_mapsize - size_used < threshold_size) + { + LOG_PRINT_L1("Threshold met (size-based)"); + return true; + } + else + return false; + } + if ((double)size_used / mei.me_mapsize > RESIZE_PERCENT) { + LOG_PRINT_L1("Threshold met (percent-based)"); return true; } return false; } +void BlockchainLMDB::check_and_resize_for_batch(uint64_t batch_num_blocks) +{ + LOG_PRINT_L1("[batch] checking DB size"); + const uint64_t min_increase_size = 128 * (1 << 20); + uint64_t threshold_size = 0; + uint64_t increase_size = 0; + if (batch_num_blocks > 0) + { + threshold_size = get_estimated_batch_size(batch_num_blocks); + LOG_PRINT_L1("calculated batch size: " << threshold_size); + + // The increased DB size could be a multiple of threshold_size, a fixed + // size increase (> threshold_size), or other variations. + // + // Currently we use the greater of threshold size and a minimum size. The + // minimum size increase is used to avoid frequent resizes when the batch + // size is set to a very small numbers of blocks. + increase_size = (threshold_size > min_increase_size) ? threshold_size : min_increase_size; + LOG_PRINT_L1("increase size: " << increase_size); + } + + // if threshold_size is 0 (i.e. number of blocks for batch not passed in), it + // will fall back to the percent-based threshold check instead of the + // size-based check + if (need_resize(threshold_size)) + { + LOG_PRINT_L0("[batch] DB resize needed"); + do_resize(increase_size); + } +} + +uint64_t BlockchainLMDB::get_estimated_batch_size(uint64_t batch_num_blocks) const +{ + uint64_t threshold_size = 0; + + // batch size estimate * batch safety factor = final size estimate + // Takes into account "reasonable" block size increases in batch. + float batch_safety_factor = 1.7f; + // estimate of stored block expanded from raw block, including denormalization and db overhead. + // Note that this probably doesn't grow linearly with block size. + float db_expand_factor = 4.5f; + uint64_t num_prev_blocks = 500; + // For resizing purposes, allow for at least 4k average block size. + uint64_t min_block_size = 4 * 1024; + + uint64_t block_stop = m_height - 1; + uint64_t block_start = 0; + if (block_stop >= num_prev_blocks) + block_start = block_stop - num_prev_blocks + 1; + uint32_t num_blocks_used = 0; + uint64_t total_block_size = 0; + for (uint64_t block_num = block_start; block_num <= block_stop; ++block_num) + { + uint32_t block_size = get_block_size(block_num); + total_block_size += block_size; + // Track number of blocks being totalled here instead of assuming, in case + // some blocks were to be skipped for being outliers. + ++num_blocks_used; + } + size_t avg_block_size = total_block_size / num_blocks_used; + LOG_PRINT_L1("average block size across recent " << num_blocks_used << " blocks: " << avg_block_size); + if (avg_block_size < min_block_size) + avg_block_size = min_block_size; + LOG_PRINT_L1("estimated average block size for batch: " << avg_block_size); + + threshold_size = avg_block_size * db_expand_factor * batch_num_blocks; + threshold_size = threshold_size * batch_safety_factor; + return threshold_size; +} + void BlockchainLMDB::add_block( const block& blk , const size_t& block_size , const difficulty_type& cumulative_difficulty @@ -1820,7 +1918,8 @@ bool BlockchainLMDB::has_key_image(const crypto::key_image& img) const return false; } -void BlockchainLMDB::batch_start() +// batch_num_blocks: (optional) Used to check if resize needed before batch transaction starts. +void BlockchainLMDB::batch_start(uint64_t batch_num_blocks) { LOG_PRINT_L3("BlockchainLMDB::" << __func__); if (! m_batch_transactions) @@ -1833,6 +1932,8 @@ void BlockchainLMDB::batch_start() throw0(DB_ERROR("batch transaction attempted, but m_write_txn already in use")); check_open(); + check_and_resize_for_batch(batch_num_blocks); + m_write_batch_txn = new mdb_txn_safe(); // NOTE: need to make sure it's destroyed properly when done @@ -1927,7 +2028,8 @@ uint64_t BlockchainLMDB::add_block( const block& blk if (m_height % 1000 == 0) { - if (need_resize()) + // for batch mode, DB resize check is done at start of batch transaction + if (! m_batch_active && need_resize()) { LOG_PRINT_L0("LMDB memory map needs resized, doing that now."); do_resize(); diff --git a/src/blockchain_db/lmdb/db_lmdb.h b/src/blockchain_db/lmdb/db_lmdb.h index 6a2646816..6f2262546 100644 --- a/src/blockchain_db/lmdb/db_lmdb.h +++ b/src/blockchain_db/lmdb/db_lmdb.h @@ -191,7 +191,7 @@ public: ); virtual void set_batch_transactions(bool batch_transactions); - virtual void batch_start(); + virtual void batch_start(uint64_t batch_num_blocks=0); virtual void batch_commit(); virtual void batch_stop(); virtual void batch_abort(); @@ -199,9 +199,11 @@ public: virtual void pop_block(block& blk, std::vector& txs); private: - void do_resize(); + void do_resize(uint64_t size_increase=0); - bool need_resize() const; + bool need_resize(uint64_t threshold_size=0) const; + void check_and_resize_for_batch(uint64_t batch_num_blocks); + uint64_t get_estimated_batch_size(uint64_t batch_num_blocks) const; virtual void add_block( const block& blk , const size_t& block_size diff --git a/src/blockchain_utilities/blockchain_converter.cpp b/src/blockchain_utilities/blockchain_converter.cpp index 855dde644..d18ce8789 100644 --- a/src/blockchain_utilities/blockchain_converter.cpp +++ b/src/blockchain_utilities/blockchain_converter.cpp @@ -236,7 +236,7 @@ int main(int argc, char* argv[]) } if (opt_batch) - blockchain->batch_start(); + blockchain->batch_start(db_batch_size); uint64_t i = 0; for (i = start_block; i < end_block + 1; ++i) { @@ -277,7 +277,7 @@ int main(int argc, char* argv[]) std::cout << "\r \r"; std::cout << "[- batch commit at height " << i + 1 << " -]" << ENDL; blockchain->batch_stop(); - blockchain->batch_start(); + blockchain->batch_start(db_batch_size); std::cout << ENDL; blockchain->show_stats(); } diff --git a/src/blockchain_utilities/blockchain_import.cpp b/src/blockchain_utilities/blockchain_import.cpp index aeed2b335..924b46d2d 100644 --- a/src/blockchain_utilities/blockchain_import.cpp +++ b/src/blockchain_utilities/blockchain_import.cpp @@ -254,7 +254,7 @@ int import_from_file(FakeCore& simple_core, std::string& import_file_path, uint6 } if (use_batch) - simple_core.batch_start(); + simple_core.batch_start(db_batch_size); LOG_PRINT_L0("Reading blockchain from bootstrap file..."); std::cout << ENDL; @@ -482,7 +482,7 @@ int import_from_file(FakeCore& simple_core, std::string& import_file_path, uint6 // zero-based height std::cout << ENDL << "[- batch commit at height " << h-1 << " -]" << ENDL; simple_core.batch_stop(); - simple_core.batch_start(); + simple_core.batch_start(db_batch_size); std::cout << ENDL; #if !defined(BLOCKCHAIN_DB) || (BLOCKCHAIN_DB == DB_LMDB) simple_core.m_storage.get_db().show_stats(); diff --git a/src/blockchain_utilities/bootstrap_file.cpp b/src/blockchain_utilities/bootstrap_file.cpp index 70f0b1fec..573cb1572 100644 --- a/src/blockchain_utilities/bootstrap_file.cpp +++ b/src/blockchain_utilities/bootstrap_file.cpp @@ -454,7 +454,7 @@ uint64_t BootstrapFile::count_blocks(const std::string& import_file_path) str1.assign(buf1, sizeof(chunk_size)); if (! ::serialization::parse_binary(str1, chunk_size)) throw std::runtime_error("Error in deserialization of chunk_size"); - LOG_PRINT_L1("chunk_size: " << chunk_size); + LOG_PRINT_L3("chunk_size: " << chunk_size); if (chunk_size > BUFFER_SIZE) { diff --git a/src/blockchain_utilities/fake_core.h b/src/blockchain_utilities/fake_core.h index 79fb51842..5eda504fe 100644 --- a/src/blockchain_utilities/fake_core.h +++ b/src/blockchain_utilities/fake_core.h @@ -96,9 +96,9 @@ struct fake_core_lmdb return m_storage.get_db().add_block(blk, block_size, cumulative_difficulty, coins_generated, txs); } - void batch_start() + void batch_start(uint64_t batch_num_blocks = 0) { - m_storage.get_db().batch_start(); + m_storage.get_db().batch_start(batch_num_blocks); } void batch_stop() @@ -150,7 +150,7 @@ struct fake_core_memory return 2; } - void batch_start() + void batch_start(uint64_t batch_num_blocks = 0) { LOG_PRINT_L0("WARNING: [batch_start] opt_batch set, but this database doesn't support/need transactions - ignoring"); }