monero/CMakeLists.txt

646 lines
23 KiB
CMake
Raw Normal View History

2015-12-30 23:39:56 -07:00
# Copyright (c) 2014-2016, The Monero Project
#
2014-09-11 00:25:07 -06:00
# All rights reserved.
#
2014-09-11 00:25:07 -06:00
# Redistribution and use in source and binary forms, with or without modification, are
# permitted provided that the following conditions are met:
#
2014-09-11 00:25:07 -06:00
# 1. Redistributions of source code must retain the above copyright notice, this list of
# conditions and the following disclaimer.
#
2014-09-11 00:25:07 -06:00
# 2. Redistributions in binary form must reproduce the above copyright notice, this list
# of conditions and the following disclaimer in the documentation and/or other
# materials provided with the distribution.
#
2014-09-11 00:25:07 -06:00
# 3. Neither the name of the copyright holder nor the names of its contributors may be
# used to endorse or promote products derived from this software without specific
# prior written permission.
#
2014-09-11 00:25:07 -06:00
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
# THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
# THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
2014-09-11 00:25:07 -06:00
# Parts of this file are originally copyright (c) 2012-2013 The Cryptonote developers
cmake_minimum_required(VERSION 2.8.7)
2014-03-03 15:07:58 -07:00
2016-09-03 13:38:20 -06:00
project(monero)
2014-10-23 12:03:54 -06:00
function (die msg)
if (NOT WIN32)
string(ASCII 27 Esc)
set(ColourReset "${Esc}[m")
set(BoldRed "${Esc}[1;31m")
else ()
set(ColourReset "")
set(BoldRed "")
endif ()
message(FATAL_ERROR "${BoldRed}${msg}${ColourReset}")
endfunction ()
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
message(STATUS "Setting default build type: ${CMAKE_BUILD_TYPE}")
endif()
string(TOLOWER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_LOWER)
# ARCH defines the target architecture, either by an explicit identifier or
# one of the following two keywords. By default, ARCH a value of 'native':
# target arch = host arch, binary is not portable. When ARCH is set to the
# string 'default', no -march arg is passed, which creates a binary that is
# portable across processors in the same family as host processor. In cases
# when ARCH is not set to an explicit identifier, cmake's builtin is used
# to identify the target architecture, to direct logic in this cmake script.
# Since ARCH is a cached variable, it will not be set on first cmake invocation.
if (NOT ARCH OR ARCH STREQUAL "" OR ARCH STREQUAL "native" OR ARCH STREQUAL "default")
set(ARCH_ID "${CMAKE_SYSTEM_PROCESSOR}")
else()
set(ARCH_ID "${ARCH}")
endif()
string(TOLOWER "${ARCH_ID}" ARM_ID)
string(SUBSTRING "${ARM_ID}" 0 3 ARM_TEST)
if (ARM_TEST STREQUAL "arm")
set(ARM 1)
string(SUBSTRING "${ARM_ID}" 0 5 ARM_TEST)
if (ARM_TEST STREQUAL "armv6")
set(ARM6 1)
endif()
if (ARM_TEST STREQUAL "armv7")
set(ARM7 1)
endif()
** CHANGES ARE EXPERIMENTAL (FOR TESTING ONLY) Bockchain: 1. Optim: Multi-thread long-hash computation when encountering groups of blocks. 2. Optim: Cache verified txs and return result from cache instead of re-checking whenever possible. 3. Optim: Preload output-keys when encoutering groups of blocks. Sort by amount and global-index before bulk querying database and multi-thread when possible. 4. Optim: Disable double spend check on block verification, double spend is already detected when trying to add blocks. 5. Optim: Multi-thread signature computation whenever possible. 6. Patch: Disable locking (recursive mutex) on called functions from check_tx_inputs which causes slowdowns (only seems to happen on ubuntu/VMs??? Reason: TBD) 7. Optim: Removed looped full-tx hash computation when retrieving transactions from pool (???). 8. Optim: Cache difficulty/timestamps (735 blocks) for next-difficulty calculations so that only 2 db reads per new block is needed when a new block arrives (instead of 1470 reads). Berkeley-DB: 1. Fix: 32-bit data errors causing wrong output global indices and failure to send blocks to peers (etc). 2. Fix: Unable to pop blocks on reorganize due to transaction errors. 3. Patch: Large number of transaction aborts when running multi-threaded bulk queries. 4. Patch: Insufficient locks error when running full sync. 5. Patch: Incorrect db stats when returning from an immediate exit from "pop block" operation. 6. Optim: Add bulk queries to get output global indices. 7. Optim: Modified output_keys table to store public_key+unlock_time+height for single transaction lookup (vs 3) 8. Optim: Used output_keys table retrieve public_keys instead of going through output_amounts->output_txs+output_indices->txs->output:public_key 9. Optim: Added thread-safe buffers used when multi-threading bulk queries. 10. Optim: Added support for nosync/write_nosync options for improved performance (*see --db-sync-mode option for details) 11. Mod: Added checkpoint thread and auto-remove-logs option. 12. *Now usable on 32-bit systems like RPI2. LMDB: 1. Optim: Added custom comparison for 256-bit key tables (minor speed-up, TBD: get actual effect) 2. Optim: Modified output_keys table to store public_key+unlock_time+height for single transaction lookup (vs 3) 3. Optim: Used output_keys table retrieve public_keys instead of going through output_amounts->output_txs+output_indices->txs->output:public_key 4. Optim: Added support for sync/writemap options for improved performance (*see --db-sync-mode option for details) 5. Mod: Auto resize to +1GB instead of multiplier x1.5 ETC: 1. Minor optimizations for slow-hash for ARM (RPI2). Incomplete. 2. Fix: 32-bit saturation bug when computing next difficulty on large blocks. [PENDING ISSUES] 1. Berkely db has a very slow "pop-block" operation. This is very noticeable on the RPI2 as it sometimes takes > 10 MINUTES to pop a block during reorganization. This does not happen very often however, most reorgs seem to take a few seconds but it possibly depends on the number of outputs present. TBD. 2. Berkeley db, possible bug "unable to allocate memory". TBD. [NEW OPTIONS] (*Currently all enabled for testing purposes) 1. --fast-block-sync arg=[0:1] (default: 1) a. 0 = Compute long hash per block (may take a while depending on CPU) b. 1 = Skip long-hash and verify blocks based on embedded known good block hashes (faster, minimal CPU dependence) 2. --db-sync-mode arg=[[safe|fast|fastest]:[sync|async]:[nblocks_per_sync]] (default: fastest:async:1000) a. safe = fdatasync/fsync (or equivalent) per stored block. Very slow, but safest option to protect against power-out/crash conditions. b. fast/fastest = Enables asynchronous fdatasync/fsync (or equivalent). Useful for battery operated devices or STABLE systems with UPS and/or systems with battery backed write cache/solid state cache. Fast - Write meta-data but defer data flush. Fastest - Defer meta-data and data flush. Sync - Flush data after nblocks_per_sync and wait. Async - Flush data after nblocks_per_sync but do not wait for the operation to finish. 3. --prep-blocks-threads arg=[n] (default: 4 or system max threads, whichever is lower) Max number of threads to use when computing long-hash in groups. 4. --show-time-stats arg=[0:1] (default: 1) Show benchmark related time stats. 5. --db-auto-remove-logs arg=[0:1] (default: 1) For berkeley-db only. Auto remove logs if enabled. **Note: lmdb and berkeley-db have changes to the tables and are not compatible with official git head version. At the moment, you need a full resync to use this optimized version. [PERFORMANCE COMPARISON] **Some figures are approximations only. Using a baseline machine of an i7-2600K+SSD+(with full pow computation): 1. The optimized lmdb/blockhain core can process blocks up to 585K for ~1.25 hours + download time, so it usually takes 2.5 hours to sync the full chain. 2. The current head with memory can process blocks up to 585K for ~4.2 hours + download time, so it usually takes 5.5 hours to sync the full chain. 3. The current head with lmdb can process blocks up to 585K for ~32 hours + download time and usually takes 36 hours to sync the full chain. Averate procesing times (with full pow computation): lmdb-optimized: 1. tx_ave = 2.5 ms / tx 2. block_ave = 5.87 ms / block memory-official-repo: 1. tx_ave = 8.85 ms / tx 2. block_ave = 19.68 ms / block lmdb-official-repo (0f4a036437fd41a5498ee5e74e2422ea6177aa3e) 1. tx_ave = 47.8 ms / tx 2. block_ave = 64.2 ms / block **Note: The following data denotes processing times only (does not include p2p download time) lmdb-optimized processing times (with full pow computation): 1. Desktop, Quad-core / 8-threads 2600k (8Mb) - 1.25 hours processing time (--db-sync-mode=fastest:async:1000). 2. Laptop, Dual-core / 4-threads U4200 (3Mb) - 4.90 hours processing time (--db-sync-mode=fastest:async:1000). 3. Embedded, Quad-core / 4-threads Z3735F (2x1Mb) - 12.0 hours processing time (--db-sync-mode=fastest:async:1000). lmdb-optimized processing times (with per-block-checkpoint) 1. Desktop, Quad-core / 8-threads 2600k (8Mb) - 10 minutes processing time (--db-sync-mode=fastest:async:1000). berkeley-db optimized processing times (with full pow computation) 1. Desktop, Quad-core / 8-threads 2600k (8Mb) - 1.8 hours processing time (--db-sync-mode=fastest:async:1000). 2. RPI2. Improved from estimated 3 months(???) into 2.5 days (*Need 2AMP supply + Clock:1Ghz + [usb+ssd] to achieve this speed) (--db-sync-mode=fastest:async:1000). berkeley-db optimized processing times (with per-block-checkpoint) 1. RPI2. 12-15 hours (*Need 2AMP supply + Clock:1Ghz + [usb+ssd] to achieve this speed) (--db-sync-mode=fastest:async:1000).
2015-07-10 14:09:32 -06:00
endif()
if (ARM_ID STREQUAL "aarch64")
set(ARM 1)
set(ARM8 1)
endif()
if(WIN32 OR ARM)
set(OPT_FLAGS_RELEASE "-O2")
else()
set(OPT_FLAGS_RELEASE "-Ofast")
** CHANGES ARE EXPERIMENTAL (FOR TESTING ONLY) Bockchain: 1. Optim: Multi-thread long-hash computation when encountering groups of blocks. 2. Optim: Cache verified txs and return result from cache instead of re-checking whenever possible. 3. Optim: Preload output-keys when encoutering groups of blocks. Sort by amount and global-index before bulk querying database and multi-thread when possible. 4. Optim: Disable double spend check on block verification, double spend is already detected when trying to add blocks. 5. Optim: Multi-thread signature computation whenever possible. 6. Patch: Disable locking (recursive mutex) on called functions from check_tx_inputs which causes slowdowns (only seems to happen on ubuntu/VMs??? Reason: TBD) 7. Optim: Removed looped full-tx hash computation when retrieving transactions from pool (???). 8. Optim: Cache difficulty/timestamps (735 blocks) for next-difficulty calculations so that only 2 db reads per new block is needed when a new block arrives (instead of 1470 reads). Berkeley-DB: 1. Fix: 32-bit data errors causing wrong output global indices and failure to send blocks to peers (etc). 2. Fix: Unable to pop blocks on reorganize due to transaction errors. 3. Patch: Large number of transaction aborts when running multi-threaded bulk queries. 4. Patch: Insufficient locks error when running full sync. 5. Patch: Incorrect db stats when returning from an immediate exit from "pop block" operation. 6. Optim: Add bulk queries to get output global indices. 7. Optim: Modified output_keys table to store public_key+unlock_time+height for single transaction lookup (vs 3) 8. Optim: Used output_keys table retrieve public_keys instead of going through output_amounts->output_txs+output_indices->txs->output:public_key 9. Optim: Added thread-safe buffers used when multi-threading bulk queries. 10. Optim: Added support for nosync/write_nosync options for improved performance (*see --db-sync-mode option for details) 11. Mod: Added checkpoint thread and auto-remove-logs option. 12. *Now usable on 32-bit systems like RPI2. LMDB: 1. Optim: Added custom comparison for 256-bit key tables (minor speed-up, TBD: get actual effect) 2. Optim: Modified output_keys table to store public_key+unlock_time+height for single transaction lookup (vs 3) 3. Optim: Used output_keys table retrieve public_keys instead of going through output_amounts->output_txs+output_indices->txs->output:public_key 4. Optim: Added support for sync/writemap options for improved performance (*see --db-sync-mode option for details) 5. Mod: Auto resize to +1GB instead of multiplier x1.5 ETC: 1. Minor optimizations for slow-hash for ARM (RPI2). Incomplete. 2. Fix: 32-bit saturation bug when computing next difficulty on large blocks. [PENDING ISSUES] 1. Berkely db has a very slow "pop-block" operation. This is very noticeable on the RPI2 as it sometimes takes > 10 MINUTES to pop a block during reorganization. This does not happen very often however, most reorgs seem to take a few seconds but it possibly depends on the number of outputs present. TBD. 2. Berkeley db, possible bug "unable to allocate memory". TBD. [NEW OPTIONS] (*Currently all enabled for testing purposes) 1. --fast-block-sync arg=[0:1] (default: 1) a. 0 = Compute long hash per block (may take a while depending on CPU) b. 1 = Skip long-hash and verify blocks based on embedded known good block hashes (faster, minimal CPU dependence) 2. --db-sync-mode arg=[[safe|fast|fastest]:[sync|async]:[nblocks_per_sync]] (default: fastest:async:1000) a. safe = fdatasync/fsync (or equivalent) per stored block. Very slow, but safest option to protect against power-out/crash conditions. b. fast/fastest = Enables asynchronous fdatasync/fsync (or equivalent). Useful for battery operated devices or STABLE systems with UPS and/or systems with battery backed write cache/solid state cache. Fast - Write meta-data but defer data flush. Fastest - Defer meta-data and data flush. Sync - Flush data after nblocks_per_sync and wait. Async - Flush data after nblocks_per_sync but do not wait for the operation to finish. 3. --prep-blocks-threads arg=[n] (default: 4 or system max threads, whichever is lower) Max number of threads to use when computing long-hash in groups. 4. --show-time-stats arg=[0:1] (default: 1) Show benchmark related time stats. 5. --db-auto-remove-logs arg=[0:1] (default: 1) For berkeley-db only. Auto remove logs if enabled. **Note: lmdb and berkeley-db have changes to the tables and are not compatible with official git head version. At the moment, you need a full resync to use this optimized version. [PERFORMANCE COMPARISON] **Some figures are approximations only. Using a baseline machine of an i7-2600K+SSD+(with full pow computation): 1. The optimized lmdb/blockhain core can process blocks up to 585K for ~1.25 hours + download time, so it usually takes 2.5 hours to sync the full chain. 2. The current head with memory can process blocks up to 585K for ~4.2 hours + download time, so it usually takes 5.5 hours to sync the full chain. 3. The current head with lmdb can process blocks up to 585K for ~32 hours + download time and usually takes 36 hours to sync the full chain. Averate procesing times (with full pow computation): lmdb-optimized: 1. tx_ave = 2.5 ms / tx 2. block_ave = 5.87 ms / block memory-official-repo: 1. tx_ave = 8.85 ms / tx 2. block_ave = 19.68 ms / block lmdb-official-repo (0f4a036437fd41a5498ee5e74e2422ea6177aa3e) 1. tx_ave = 47.8 ms / tx 2. block_ave = 64.2 ms / block **Note: The following data denotes processing times only (does not include p2p download time) lmdb-optimized processing times (with full pow computation): 1. Desktop, Quad-core / 8-threads 2600k (8Mb) - 1.25 hours processing time (--db-sync-mode=fastest:async:1000). 2. Laptop, Dual-core / 4-threads U4200 (3Mb) - 4.90 hours processing time (--db-sync-mode=fastest:async:1000). 3. Embedded, Quad-core / 4-threads Z3735F (2x1Mb) - 12.0 hours processing time (--db-sync-mode=fastest:async:1000). lmdb-optimized processing times (with per-block-checkpoint) 1. Desktop, Quad-core / 8-threads 2600k (8Mb) - 10 minutes processing time (--db-sync-mode=fastest:async:1000). berkeley-db optimized processing times (with full pow computation) 1. Desktop, Quad-core / 8-threads 2600k (8Mb) - 1.8 hours processing time (--db-sync-mode=fastest:async:1000). 2. RPI2. Improved from estimated 3 months(???) into 2.5 days (*Need 2AMP supply + Clock:1Ghz + [usb+ssd] to achieve this speed) (--db-sync-mode=fastest:async:1000). berkeley-db optimized processing times (with per-block-checkpoint) 1. RPI2. 12-15 hours (*Need 2AMP supply + Clock:1Ghz + [usb+ssd] to achieve this speed) (--db-sync-mode=fastest:async:1000).
2015-07-10 14:09:32 -06:00
endif()
set(CMAKE_C_FLAGS_RELEASE "-DNDEBUG ${OPT_FLAGS_RELEASE}")
set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG ${OPT_FLAGS_RELEASE}")
** CHANGES ARE EXPERIMENTAL (FOR TESTING ONLY) Bockchain: 1. Optim: Multi-thread long-hash computation when encountering groups of blocks. 2. Optim: Cache verified txs and return result from cache instead of re-checking whenever possible. 3. Optim: Preload output-keys when encoutering groups of blocks. Sort by amount and global-index before bulk querying database and multi-thread when possible. 4. Optim: Disable double spend check on block verification, double spend is already detected when trying to add blocks. 5. Optim: Multi-thread signature computation whenever possible. 6. Patch: Disable locking (recursive mutex) on called functions from check_tx_inputs which causes slowdowns (only seems to happen on ubuntu/VMs??? Reason: TBD) 7. Optim: Removed looped full-tx hash computation when retrieving transactions from pool (???). 8. Optim: Cache difficulty/timestamps (735 blocks) for next-difficulty calculations so that only 2 db reads per new block is needed when a new block arrives (instead of 1470 reads). Berkeley-DB: 1. Fix: 32-bit data errors causing wrong output global indices and failure to send blocks to peers (etc). 2. Fix: Unable to pop blocks on reorganize due to transaction errors. 3. Patch: Large number of transaction aborts when running multi-threaded bulk queries. 4. Patch: Insufficient locks error when running full sync. 5. Patch: Incorrect db stats when returning from an immediate exit from "pop block" operation. 6. Optim: Add bulk queries to get output global indices. 7. Optim: Modified output_keys table to store public_key+unlock_time+height for single transaction lookup (vs 3) 8. Optim: Used output_keys table retrieve public_keys instead of going through output_amounts->output_txs+output_indices->txs->output:public_key 9. Optim: Added thread-safe buffers used when multi-threading bulk queries. 10. Optim: Added support for nosync/write_nosync options for improved performance (*see --db-sync-mode option for details) 11. Mod: Added checkpoint thread and auto-remove-logs option. 12. *Now usable on 32-bit systems like RPI2. LMDB: 1. Optim: Added custom comparison for 256-bit key tables (minor speed-up, TBD: get actual effect) 2. Optim: Modified output_keys table to store public_key+unlock_time+height for single transaction lookup (vs 3) 3. Optim: Used output_keys table retrieve public_keys instead of going through output_amounts->output_txs+output_indices->txs->output:public_key 4. Optim: Added support for sync/writemap options for improved performance (*see --db-sync-mode option for details) 5. Mod: Auto resize to +1GB instead of multiplier x1.5 ETC: 1. Minor optimizations for slow-hash for ARM (RPI2). Incomplete. 2. Fix: 32-bit saturation bug when computing next difficulty on large blocks. [PENDING ISSUES] 1. Berkely db has a very slow "pop-block" operation. This is very noticeable on the RPI2 as it sometimes takes > 10 MINUTES to pop a block during reorganization. This does not happen very often however, most reorgs seem to take a few seconds but it possibly depends on the number of outputs present. TBD. 2. Berkeley db, possible bug "unable to allocate memory". TBD. [NEW OPTIONS] (*Currently all enabled for testing purposes) 1. --fast-block-sync arg=[0:1] (default: 1) a. 0 = Compute long hash per block (may take a while depending on CPU) b. 1 = Skip long-hash and verify blocks based on embedded known good block hashes (faster, minimal CPU dependence) 2. --db-sync-mode arg=[[safe|fast|fastest]:[sync|async]:[nblocks_per_sync]] (default: fastest:async:1000) a. safe = fdatasync/fsync (or equivalent) per stored block. Very slow, but safest option to protect against power-out/crash conditions. b. fast/fastest = Enables asynchronous fdatasync/fsync (or equivalent). Useful for battery operated devices or STABLE systems with UPS and/or systems with battery backed write cache/solid state cache. Fast - Write meta-data but defer data flush. Fastest - Defer meta-data and data flush. Sync - Flush data after nblocks_per_sync and wait. Async - Flush data after nblocks_per_sync but do not wait for the operation to finish. 3. --prep-blocks-threads arg=[n] (default: 4 or system max threads, whichever is lower) Max number of threads to use when computing long-hash in groups. 4. --show-time-stats arg=[0:1] (default: 1) Show benchmark related time stats. 5. --db-auto-remove-logs arg=[0:1] (default: 1) For berkeley-db only. Auto remove logs if enabled. **Note: lmdb and berkeley-db have changes to the tables and are not compatible with official git head version. At the moment, you need a full resync to use this optimized version. [PERFORMANCE COMPARISON] **Some figures are approximations only. Using a baseline machine of an i7-2600K+SSD+(with full pow computation): 1. The optimized lmdb/blockhain core can process blocks up to 585K for ~1.25 hours + download time, so it usually takes 2.5 hours to sync the full chain. 2. The current head with memory can process blocks up to 585K for ~4.2 hours + download time, so it usually takes 5.5 hours to sync the full chain. 3. The current head with lmdb can process blocks up to 585K for ~32 hours + download time and usually takes 36 hours to sync the full chain. Averate procesing times (with full pow computation): lmdb-optimized: 1. tx_ave = 2.5 ms / tx 2. block_ave = 5.87 ms / block memory-official-repo: 1. tx_ave = 8.85 ms / tx 2. block_ave = 19.68 ms / block lmdb-official-repo (0f4a036437fd41a5498ee5e74e2422ea6177aa3e) 1. tx_ave = 47.8 ms / tx 2. block_ave = 64.2 ms / block **Note: The following data denotes processing times only (does not include p2p download time) lmdb-optimized processing times (with full pow computation): 1. Desktop, Quad-core / 8-threads 2600k (8Mb) - 1.25 hours processing time (--db-sync-mode=fastest:async:1000). 2. Laptop, Dual-core / 4-threads U4200 (3Mb) - 4.90 hours processing time (--db-sync-mode=fastest:async:1000). 3. Embedded, Quad-core / 4-threads Z3735F (2x1Mb) - 12.0 hours processing time (--db-sync-mode=fastest:async:1000). lmdb-optimized processing times (with per-block-checkpoint) 1. Desktop, Quad-core / 8-threads 2600k (8Mb) - 10 minutes processing time (--db-sync-mode=fastest:async:1000). berkeley-db optimized processing times (with full pow computation) 1. Desktop, Quad-core / 8-threads 2600k (8Mb) - 1.8 hours processing time (--db-sync-mode=fastest:async:1000). 2. RPI2. Improved from estimated 3 months(???) into 2.5 days (*Need 2AMP supply + Clock:1Ghz + [usb+ssd] to achieve this speed) (--db-sync-mode=fastest:async:1000). berkeley-db optimized processing times (with per-block-checkpoint) 1. RPI2. 12-15 hours (*Need 2AMP supply + Clock:1Ghz + [usb+ssd] to achieve this speed) (--db-sync-mode=fastest:async:1000).
2015-07-10 14:09:32 -06:00
# set this to 0 if per-block checkpoint needs to be disabled
set(PER_BLOCK_CHECKPOINT 1)
if(PER_BLOCK_CHECKPOINT)
add_definitions("-DPER_BLOCK_CHECKPOINT")
endif()
2014-10-22 13:17:52 -06:00
list(INSERT CMAKE_MODULE_PATH 0
"${CMAKE_SOURCE_DIR}/cmake")
if (NOT DEFINED ENV{DEVELOPER_LOCAL_TOOLS})
message(STATUS "Could not find DEVELOPER_LOCAL_TOOLS in env (not required)")
set(BOOST_IGNORE_SYSTEM_PATHS_DEFAULT OFF)
elseif ("$ENV{DEVELOPER_LOCAL_TOOLS}" EQUAL 1)
message(STATUS "Found: env DEVELOPER_LOCAL_TOOLS = 1")
set(BOOST_IGNORE_SYSTEM_PATHS_DEFAULT ON)
else()
message(STATUS "Found: env DEVELOPER_LOCAL_TOOLS = 0")
set(BOOST_IGNORE_SYSTEM_PATHS_DEFAULT OFF)
endif()
message(STATUS "BOOST_IGNORE_SYSTEM_PATHS defaults to ${BOOST_IGNORE_SYSTEM_PATHS_DEFAULT}")
option(BOOST_IGNORE_SYSTEM_PATHS "Ignore boost system paths for local boost installation" ${BOOST_IGNORE_SYSTEM_PATHS_DEFAULT})
if (NOT DEFINED ENV{DEVELOPER_LIBUNBOUND_OLD})
message(STATUS "Could not find DEVELOPER_LIBUNBOUND_OLD in env (not required)")
elseif ("$ENV{DEVELOPER_LIBUNBOUND_OLD}" EQUAL 1)
message(STATUS "Found: env DEVELOPER_LIBUNBOUND_OLD = 1, will use the work around")
add_definitions(-DDEVELOPER_LIBUNBOUND_OLD)
elseif ("$ENV{DEVELOPER_LIBUNBOUND_OLD}" EQUAL 0)
message(STATUS "Found: env DEVELOPER_LIBUNBOUND_OLD = 0")
else()
message(STATUS "Found: env DEVELOPER_LIBUNBOUND_OLD with bad value. Will NOT use the work around")
endif()
2014-03-03 15:07:58 -07:00
set_property(GLOBAL PROPERTY USE_FOLDERS ON)
enable_testing()
option(BUILD_DOCUMENTATION "Build the Doxygen documentation." ON)
# Check whether we're on a 32-bit or 64-bit system
if(CMAKE_SIZEOF_VOID_P EQUAL "8")
2015-03-02 09:04:58 -07:00
set(DEFAULT_BUILD_64 ON)
else()
2015-03-02 09:04:58 -07:00
set(DEFAULT_BUILD_64 OFF)
endif()
2015-03-03 12:57:40 -07:00
option(BUILD_64 "Build for 64-bit? 'OFF' builds for 32-bit." ${DEFAULT_BUILD_64})
2015-03-02 09:04:58 -07:00
if(BUILD_64)
set(ARCH_WIDTH "64")
else()
set(ARCH_WIDTH "32")
endif()
message(STATUS "Building for a ${ARCH_WIDTH}-bit system")
# Check if we're on FreeBSD so we can exclude the local miniupnpc (it should be installed from ports instead)
# CMAKE_SYSTEM_NAME checks are commonly known, but specifically taken from libsdl's CMakeLists
if(CMAKE_SYSTEM_NAME MATCHES "kFreeBSD.*")
set(FREEBSD TRUE)
elseif(CMAKE_SYSTEM_NAME MATCHES "DragonFly.*|FreeBSD")
set(FREEBSD TRUE)
endif()
# Check if we're on OpenBSD. See the README.md for build instructions.
2016-01-21 11:18:26 -07:00
if(CMAKE_SYSTEM_NAME MATCHES "kOpenBSD.*|OpenBSD.*")
set(OPENBSD TRUE)
endif()
# TODO: check bsdi, NetBSD, to see if they need the same FreeBSD changes
#
# elseif(CMAKE_SYSTEM_NAME MATCHES "kNetBSD.*|NetBSD.*")
# set(NETBSD TRUE)
# elseif(CMAKE_SYSTEM_NAME MATCHES ".*BSDI.*")
# set(BSDI TRUE)
2014-04-09 06:14:35 -06:00
include_directories(src contrib/epee/include external "${CMAKE_BINARY_DIR}/version")
2014-03-03 15:07:58 -07:00
if(APPLE)
2014-04-30 14:50:06 -06:00
include_directories(SYSTEM /usr/include/malloc)
endif()
if(MSVC OR MINGW)
set(DEFAULT_STATIC true)
else()
set(DEFAULT_STATIC false)
endif()
2014-10-21 12:20:26 -06:00
option(STATIC "Link libraries statically" ${DEFAULT_STATIC})
2014-03-03 15:07:58 -07:00
# This is a CMake built-in switch that concerns internal libraries
if (NOT DEFINED BUILD_SHARED_LIBS AND NOT STATIC AND CMAKE_BUILD_TYPE_LOWER STREQUAL "debug")
set(BUILD_SHARED_LIBS ON CACHE STRING "Build internal libs as shared")
endif()
if (BUILD_SHARED_LIBS)
message(STATUS "Building internal libraries as shared")
set(PIC_FLAG "-fPIC")
else()
message(STATUS "Building internal libraries as static")
endif()
if(MINGW)
string(REGEX MATCH "^[^/]:/[^/]*" msys2_install_path "${CMAKE_C_COMPILER}")
message(STATUS "MSYS location: ${msys2_install_path}")
set(CMAKE_INCLUDE_PATH "${msys2_install_path}/mingw${ARCH_WIDTH}/include")
# This is necessary because otherwise CMake will make Boost libraries -lfoo
# rather than a full path. Unfortunately, this makes the shared libraries get
# linked due to a bug in CMake which misses putting -static flags around the
# -lfoo arguments.
set(DEFLIB ${msys2_install_path}/mingw${ARCH_WIDTH}/lib)
list(REMOVE_ITEM CMAKE_C_IMPLICIT_LINK_DIRECTORIES ${DEFLIB})
list(REMOVE_ITEM CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES ${DEFLIB})
endif()
if(STATIC)
if(MSVC)
set(CMAKE_FIND_LIBRARY_SUFFIXES .lib .dll.a .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
else()
set(CMAKE_FIND_LIBRARY_SUFFIXES .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
endif()
endif()
# default database:
# should be lmdb for testing, memory for production still
# set(DATABASE memory)
set(DATABASE lmdb)
if (DEFINED ENV{DATABASE})
set(DATABASE $ENV{DATABASE})
message(STATUS "DATABASE set: ${DATABASE}")
else()
message(STATUS "Could not find DATABASE in env (not required unless you want to change database type from default: ${DATABASE})")
endif()
** CHANGES ARE EXPERIMENTAL (FOR TESTING ONLY) Bockchain: 1. Optim: Multi-thread long-hash computation when encountering groups of blocks. 2. Optim: Cache verified txs and return result from cache instead of re-checking whenever possible. 3. Optim: Preload output-keys when encoutering groups of blocks. Sort by amount and global-index before bulk querying database and multi-thread when possible. 4. Optim: Disable double spend check on block verification, double spend is already detected when trying to add blocks. 5. Optim: Multi-thread signature computation whenever possible. 6. Patch: Disable locking (recursive mutex) on called functions from check_tx_inputs which causes slowdowns (only seems to happen on ubuntu/VMs??? Reason: TBD) 7. Optim: Removed looped full-tx hash computation when retrieving transactions from pool (???). 8. Optim: Cache difficulty/timestamps (735 blocks) for next-difficulty calculations so that only 2 db reads per new block is needed when a new block arrives (instead of 1470 reads). Berkeley-DB: 1. Fix: 32-bit data errors causing wrong output global indices and failure to send blocks to peers (etc). 2. Fix: Unable to pop blocks on reorganize due to transaction errors. 3. Patch: Large number of transaction aborts when running multi-threaded bulk queries. 4. Patch: Insufficient locks error when running full sync. 5. Patch: Incorrect db stats when returning from an immediate exit from "pop block" operation. 6. Optim: Add bulk queries to get output global indices. 7. Optim: Modified output_keys table to store public_key+unlock_time+height for single transaction lookup (vs 3) 8. Optim: Used output_keys table retrieve public_keys instead of going through output_amounts->output_txs+output_indices->txs->output:public_key 9. Optim: Added thread-safe buffers used when multi-threading bulk queries. 10. Optim: Added support for nosync/write_nosync options for improved performance (*see --db-sync-mode option for details) 11. Mod: Added checkpoint thread and auto-remove-logs option. 12. *Now usable on 32-bit systems like RPI2. LMDB: 1. Optim: Added custom comparison for 256-bit key tables (minor speed-up, TBD: get actual effect) 2. Optim: Modified output_keys table to store public_key+unlock_time+height for single transaction lookup (vs 3) 3. Optim: Used output_keys table retrieve public_keys instead of going through output_amounts->output_txs+output_indices->txs->output:public_key 4. Optim: Added support for sync/writemap options for improved performance (*see --db-sync-mode option for details) 5. Mod: Auto resize to +1GB instead of multiplier x1.5 ETC: 1. Minor optimizations for slow-hash for ARM (RPI2). Incomplete. 2. Fix: 32-bit saturation bug when computing next difficulty on large blocks. [PENDING ISSUES] 1. Berkely db has a very slow "pop-block" operation. This is very noticeable on the RPI2 as it sometimes takes > 10 MINUTES to pop a block during reorganization. This does not happen very often however, most reorgs seem to take a few seconds but it possibly depends on the number of outputs present. TBD. 2. Berkeley db, possible bug "unable to allocate memory". TBD. [NEW OPTIONS] (*Currently all enabled for testing purposes) 1. --fast-block-sync arg=[0:1] (default: 1) a. 0 = Compute long hash per block (may take a while depending on CPU) b. 1 = Skip long-hash and verify blocks based on embedded known good block hashes (faster, minimal CPU dependence) 2. --db-sync-mode arg=[[safe|fast|fastest]:[sync|async]:[nblocks_per_sync]] (default: fastest:async:1000) a. safe = fdatasync/fsync (or equivalent) per stored block. Very slow, but safest option to protect against power-out/crash conditions. b. fast/fastest = Enables asynchronous fdatasync/fsync (or equivalent). Useful for battery operated devices or STABLE systems with UPS and/or systems with battery backed write cache/solid state cache. Fast - Write meta-data but defer data flush. Fastest - Defer meta-data and data flush. Sync - Flush data after nblocks_per_sync and wait. Async - Flush data after nblocks_per_sync but do not wait for the operation to finish. 3. --prep-blocks-threads arg=[n] (default: 4 or system max threads, whichever is lower) Max number of threads to use when computing long-hash in groups. 4. --show-time-stats arg=[0:1] (default: 1) Show benchmark related time stats. 5. --db-auto-remove-logs arg=[0:1] (default: 1) For berkeley-db only. Auto remove logs if enabled. **Note: lmdb and berkeley-db have changes to the tables and are not compatible with official git head version. At the moment, you need a full resync to use this optimized version. [PERFORMANCE COMPARISON] **Some figures are approximations only. Using a baseline machine of an i7-2600K+SSD+(with full pow computation): 1. The optimized lmdb/blockhain core can process blocks up to 585K for ~1.25 hours + download time, so it usually takes 2.5 hours to sync the full chain. 2. The current head with memory can process blocks up to 585K for ~4.2 hours + download time, so it usually takes 5.5 hours to sync the full chain. 3. The current head with lmdb can process blocks up to 585K for ~32 hours + download time and usually takes 36 hours to sync the full chain. Averate procesing times (with full pow computation): lmdb-optimized: 1. tx_ave = 2.5 ms / tx 2. block_ave = 5.87 ms / block memory-official-repo: 1. tx_ave = 8.85 ms / tx 2. block_ave = 19.68 ms / block lmdb-official-repo (0f4a036437fd41a5498ee5e74e2422ea6177aa3e) 1. tx_ave = 47.8 ms / tx 2. block_ave = 64.2 ms / block **Note: The following data denotes processing times only (does not include p2p download time) lmdb-optimized processing times (with full pow computation): 1. Desktop, Quad-core / 8-threads 2600k (8Mb) - 1.25 hours processing time (--db-sync-mode=fastest:async:1000). 2. Laptop, Dual-core / 4-threads U4200 (3Mb) - 4.90 hours processing time (--db-sync-mode=fastest:async:1000). 3. Embedded, Quad-core / 4-threads Z3735F (2x1Mb) - 12.0 hours processing time (--db-sync-mode=fastest:async:1000). lmdb-optimized processing times (with per-block-checkpoint) 1. Desktop, Quad-core / 8-threads 2600k (8Mb) - 10 minutes processing time (--db-sync-mode=fastest:async:1000). berkeley-db optimized processing times (with full pow computation) 1. Desktop, Quad-core / 8-threads 2600k (8Mb) - 1.8 hours processing time (--db-sync-mode=fastest:async:1000). 2. RPI2. Improved from estimated 3 months(???) into 2.5 days (*Need 2AMP supply + Clock:1Ghz + [usb+ssd] to achieve this speed) (--db-sync-mode=fastest:async:1000). berkeley-db optimized processing times (with per-block-checkpoint) 1. RPI2. 12-15 hours (*Need 2AMP supply + Clock:1Ghz + [usb+ssd] to achieve this speed) (--db-sync-mode=fastest:async:1000).
2015-07-10 14:09:32 -06:00
set(BERKELEY_DB_OVERRIDE 0)
if (DEFINED ENV{BERKELEY_DB})
set(BERKELEY_DB_OVERRIDE 1)
set(BERKELEY_DB $ENV{BERKELEY_DB})
elseif()
set(BERKELEY_DB 0)
endif()
if (DATABASE STREQUAL "lmdb")
message(STATUS "Using LMDB as default DB type")
set(BLOCKCHAIN_DB DB_LMDB)
add_definitions("-DDEFAULT_DB_TYPE=\"lmdb\"")
elseif (DATABASE STREQUAL "berkeleydb")
find_package(BerkeleyDB)
if(NOT BERKELEY_DB)
die("Found BerkeleyDB includes, but could not find BerkeleyDB library. Please make sure you have installed libdb and libdb-dev / libdb++-dev or the equivalent.")
else()
message(STATUS "Found BerkeleyDB include (db.h) in ${BERKELEY_DB_INCLUDE_DIR}")
if(BERKELEY_DB_LIBRARIES)
message(STATUS "Found BerkeleyDB shared library")
set(BDB_STATIC false CACHE BOOL "BDB Static flag")
set(BDB_INCLUDE ${BERKELEY_DB_INCLUDE_DIR} CACHE STRING "BDB include path")
set(BDB_LIBRARY ${BERKELEY_DB_LIBRARIES} CACHE STRING "BDB library name")
set(BDB_LIBRARY_DIRS "" CACHE STRING "BDB Library dirs")
set(BERKELEY_DB 1)
else()
die("Found BerkeleyDB includes, but could not find BerkeleyDB library. Please make sure you have installed libdb and libdb-dev / libdb++-dev or the equivalent.")
endif()
** CHANGES ARE EXPERIMENTAL (FOR TESTING ONLY) Bockchain: 1. Optim: Multi-thread long-hash computation when encountering groups of blocks. 2. Optim: Cache verified txs and return result from cache instead of re-checking whenever possible. 3. Optim: Preload output-keys when encoutering groups of blocks. Sort by amount and global-index before bulk querying database and multi-thread when possible. 4. Optim: Disable double spend check on block verification, double spend is already detected when trying to add blocks. 5. Optim: Multi-thread signature computation whenever possible. 6. Patch: Disable locking (recursive mutex) on called functions from check_tx_inputs which causes slowdowns (only seems to happen on ubuntu/VMs??? Reason: TBD) 7. Optim: Removed looped full-tx hash computation when retrieving transactions from pool (???). 8. Optim: Cache difficulty/timestamps (735 blocks) for next-difficulty calculations so that only 2 db reads per new block is needed when a new block arrives (instead of 1470 reads). Berkeley-DB: 1. Fix: 32-bit data errors causing wrong output global indices and failure to send blocks to peers (etc). 2. Fix: Unable to pop blocks on reorganize due to transaction errors. 3. Patch: Large number of transaction aborts when running multi-threaded bulk queries. 4. Patch: Insufficient locks error when running full sync. 5. Patch: Incorrect db stats when returning from an immediate exit from "pop block" operation. 6. Optim: Add bulk queries to get output global indices. 7. Optim: Modified output_keys table to store public_key+unlock_time+height for single transaction lookup (vs 3) 8. Optim: Used output_keys table retrieve public_keys instead of going through output_amounts->output_txs+output_indices->txs->output:public_key 9. Optim: Added thread-safe buffers used when multi-threading bulk queries. 10. Optim: Added support for nosync/write_nosync options for improved performance (*see --db-sync-mode option for details) 11. Mod: Added checkpoint thread and auto-remove-logs option. 12. *Now usable on 32-bit systems like RPI2. LMDB: 1. Optim: Added custom comparison for 256-bit key tables (minor speed-up, TBD: get actual effect) 2. Optim: Modified output_keys table to store public_key+unlock_time+height for single transaction lookup (vs 3) 3. Optim: Used output_keys table retrieve public_keys instead of going through output_amounts->output_txs+output_indices->txs->output:public_key 4. Optim: Added support for sync/writemap options for improved performance (*see --db-sync-mode option for details) 5. Mod: Auto resize to +1GB instead of multiplier x1.5 ETC: 1. Minor optimizations for slow-hash for ARM (RPI2). Incomplete. 2. Fix: 32-bit saturation bug when computing next difficulty on large blocks. [PENDING ISSUES] 1. Berkely db has a very slow "pop-block" operation. This is very noticeable on the RPI2 as it sometimes takes > 10 MINUTES to pop a block during reorganization. This does not happen very often however, most reorgs seem to take a few seconds but it possibly depends on the number of outputs present. TBD. 2. Berkeley db, possible bug "unable to allocate memory". TBD. [NEW OPTIONS] (*Currently all enabled for testing purposes) 1. --fast-block-sync arg=[0:1] (default: 1) a. 0 = Compute long hash per block (may take a while depending on CPU) b. 1 = Skip long-hash and verify blocks based on embedded known good block hashes (faster, minimal CPU dependence) 2. --db-sync-mode arg=[[safe|fast|fastest]:[sync|async]:[nblocks_per_sync]] (default: fastest:async:1000) a. safe = fdatasync/fsync (or equivalent) per stored block. Very slow, but safest option to protect against power-out/crash conditions. b. fast/fastest = Enables asynchronous fdatasync/fsync (or equivalent). Useful for battery operated devices or STABLE systems with UPS and/or systems with battery backed write cache/solid state cache. Fast - Write meta-data but defer data flush. Fastest - Defer meta-data and data flush. Sync - Flush data after nblocks_per_sync and wait. Async - Flush data after nblocks_per_sync but do not wait for the operation to finish. 3. --prep-blocks-threads arg=[n] (default: 4 or system max threads, whichever is lower) Max number of threads to use when computing long-hash in groups. 4. --show-time-stats arg=[0:1] (default: 1) Show benchmark related time stats. 5. --db-auto-remove-logs arg=[0:1] (default: 1) For berkeley-db only. Auto remove logs if enabled. **Note: lmdb and berkeley-db have changes to the tables and are not compatible with official git head version. At the moment, you need a full resync to use this optimized version. [PERFORMANCE COMPARISON] **Some figures are approximations only. Using a baseline machine of an i7-2600K+SSD+(with full pow computation): 1. The optimized lmdb/blockhain core can process blocks up to 585K for ~1.25 hours + download time, so it usually takes 2.5 hours to sync the full chain. 2. The current head with memory can process blocks up to 585K for ~4.2 hours + download time, so it usually takes 5.5 hours to sync the full chain. 3. The current head with lmdb can process blocks up to 585K for ~32 hours + download time and usually takes 36 hours to sync the full chain. Averate procesing times (with full pow computation): lmdb-optimized: 1. tx_ave = 2.5 ms / tx 2. block_ave = 5.87 ms / block memory-official-repo: 1. tx_ave = 8.85 ms / tx 2. block_ave = 19.68 ms / block lmdb-official-repo (0f4a036437fd41a5498ee5e74e2422ea6177aa3e) 1. tx_ave = 47.8 ms / tx 2. block_ave = 64.2 ms / block **Note: The following data denotes processing times only (does not include p2p download time) lmdb-optimized processing times (with full pow computation): 1. Desktop, Quad-core / 8-threads 2600k (8Mb) - 1.25 hours processing time (--db-sync-mode=fastest:async:1000). 2. Laptop, Dual-core / 4-threads U4200 (3Mb) - 4.90 hours processing time (--db-sync-mode=fastest:async:1000). 3. Embedded, Quad-core / 4-threads Z3735F (2x1Mb) - 12.0 hours processing time (--db-sync-mode=fastest:async:1000). lmdb-optimized processing times (with per-block-checkpoint) 1. Desktop, Quad-core / 8-threads 2600k (8Mb) - 10 minutes processing time (--db-sync-mode=fastest:async:1000). berkeley-db optimized processing times (with full pow computation) 1. Desktop, Quad-core / 8-threads 2600k (8Mb) - 1.8 hours processing time (--db-sync-mode=fastest:async:1000). 2. RPI2. Improved from estimated 3 months(???) into 2.5 days (*Need 2AMP supply + Clock:1Ghz + [usb+ssd] to achieve this speed) (--db-sync-mode=fastest:async:1000). berkeley-db optimized processing times (with per-block-checkpoint) 1. RPI2. 12-15 hours (*Need 2AMP supply + Clock:1Ghz + [usb+ssd] to achieve this speed) (--db-sync-mode=fastest:async:1000).
2015-07-10 14:09:32 -06:00
endif()
message(STATUS "Using Berkeley DB as default DB type")
add_definitions("-DDEFAULT_DB_TYPE=\"berkeley\"")
else()
die("Invalid database type: ${DATABASE}")
endif()
** CHANGES ARE EXPERIMENTAL (FOR TESTING ONLY) Bockchain: 1. Optim: Multi-thread long-hash computation when encountering groups of blocks. 2. Optim: Cache verified txs and return result from cache instead of re-checking whenever possible. 3. Optim: Preload output-keys when encoutering groups of blocks. Sort by amount and global-index before bulk querying database and multi-thread when possible. 4. Optim: Disable double spend check on block verification, double spend is already detected when trying to add blocks. 5. Optim: Multi-thread signature computation whenever possible. 6. Patch: Disable locking (recursive mutex) on called functions from check_tx_inputs which causes slowdowns (only seems to happen on ubuntu/VMs??? Reason: TBD) 7. Optim: Removed looped full-tx hash computation when retrieving transactions from pool (???). 8. Optim: Cache difficulty/timestamps (735 blocks) for next-difficulty calculations so that only 2 db reads per new block is needed when a new block arrives (instead of 1470 reads). Berkeley-DB: 1. Fix: 32-bit data errors causing wrong output global indices and failure to send blocks to peers (etc). 2. Fix: Unable to pop blocks on reorganize due to transaction errors. 3. Patch: Large number of transaction aborts when running multi-threaded bulk queries. 4. Patch: Insufficient locks error when running full sync. 5. Patch: Incorrect db stats when returning from an immediate exit from "pop block" operation. 6. Optim: Add bulk queries to get output global indices. 7. Optim: Modified output_keys table to store public_key+unlock_time+height for single transaction lookup (vs 3) 8. Optim: Used output_keys table retrieve public_keys instead of going through output_amounts->output_txs+output_indices->txs->output:public_key 9. Optim: Added thread-safe buffers used when multi-threading bulk queries. 10. Optim: Added support for nosync/write_nosync options for improved performance (*see --db-sync-mode option for details) 11. Mod: Added checkpoint thread and auto-remove-logs option. 12. *Now usable on 32-bit systems like RPI2. LMDB: 1. Optim: Added custom comparison for 256-bit key tables (minor speed-up, TBD: get actual effect) 2. Optim: Modified output_keys table to store public_key+unlock_time+height for single transaction lookup (vs 3) 3. Optim: Used output_keys table retrieve public_keys instead of going through output_amounts->output_txs+output_indices->txs->output:public_key 4. Optim: Added support for sync/writemap options for improved performance (*see --db-sync-mode option for details) 5. Mod: Auto resize to +1GB instead of multiplier x1.5 ETC: 1. Minor optimizations for slow-hash for ARM (RPI2). Incomplete. 2. Fix: 32-bit saturation bug when computing next difficulty on large blocks. [PENDING ISSUES] 1. Berkely db has a very slow "pop-block" operation. This is very noticeable on the RPI2 as it sometimes takes > 10 MINUTES to pop a block during reorganization. This does not happen very often however, most reorgs seem to take a few seconds but it possibly depends on the number of outputs present. TBD. 2. Berkeley db, possible bug "unable to allocate memory". TBD. [NEW OPTIONS] (*Currently all enabled for testing purposes) 1. --fast-block-sync arg=[0:1] (default: 1) a. 0 = Compute long hash per block (may take a while depending on CPU) b. 1 = Skip long-hash and verify blocks based on embedded known good block hashes (faster, minimal CPU dependence) 2. --db-sync-mode arg=[[safe|fast|fastest]:[sync|async]:[nblocks_per_sync]] (default: fastest:async:1000) a. safe = fdatasync/fsync (or equivalent) per stored block. Very slow, but safest option to protect against power-out/crash conditions. b. fast/fastest = Enables asynchronous fdatasync/fsync (or equivalent). Useful for battery operated devices or STABLE systems with UPS and/or systems with battery backed write cache/solid state cache. Fast - Write meta-data but defer data flush. Fastest - Defer meta-data and data flush. Sync - Flush data after nblocks_per_sync and wait. Async - Flush data after nblocks_per_sync but do not wait for the operation to finish. 3. --prep-blocks-threads arg=[n] (default: 4 or system max threads, whichever is lower) Max number of threads to use when computing long-hash in groups. 4. --show-time-stats arg=[0:1] (default: 1) Show benchmark related time stats. 5. --db-auto-remove-logs arg=[0:1] (default: 1) For berkeley-db only. Auto remove logs if enabled. **Note: lmdb and berkeley-db have changes to the tables and are not compatible with official git head version. At the moment, you need a full resync to use this optimized version. [PERFORMANCE COMPARISON] **Some figures are approximations only. Using a baseline machine of an i7-2600K+SSD+(with full pow computation): 1. The optimized lmdb/blockhain core can process blocks up to 585K for ~1.25 hours + download time, so it usually takes 2.5 hours to sync the full chain. 2. The current head with memory can process blocks up to 585K for ~4.2 hours + download time, so it usually takes 5.5 hours to sync the full chain. 3. The current head with lmdb can process blocks up to 585K for ~32 hours + download time and usually takes 36 hours to sync the full chain. Averate procesing times (with full pow computation): lmdb-optimized: 1. tx_ave = 2.5 ms / tx 2. block_ave = 5.87 ms / block memory-official-repo: 1. tx_ave = 8.85 ms / tx 2. block_ave = 19.68 ms / block lmdb-official-repo (0f4a036437fd41a5498ee5e74e2422ea6177aa3e) 1. tx_ave = 47.8 ms / tx 2. block_ave = 64.2 ms / block **Note: The following data denotes processing times only (does not include p2p download time) lmdb-optimized processing times (with full pow computation): 1. Desktop, Quad-core / 8-threads 2600k (8Mb) - 1.25 hours processing time (--db-sync-mode=fastest:async:1000). 2. Laptop, Dual-core / 4-threads U4200 (3Mb) - 4.90 hours processing time (--db-sync-mode=fastest:async:1000). 3. Embedded, Quad-core / 4-threads Z3735F (2x1Mb) - 12.0 hours processing time (--db-sync-mode=fastest:async:1000). lmdb-optimized processing times (with per-block-checkpoint) 1. Desktop, Quad-core / 8-threads 2600k (8Mb) - 10 minutes processing time (--db-sync-mode=fastest:async:1000). berkeley-db optimized processing times (with full pow computation) 1. Desktop, Quad-core / 8-threads 2600k (8Mb) - 1.8 hours processing time (--db-sync-mode=fastest:async:1000). 2. RPI2. Improved from estimated 3 months(???) into 2.5 days (*Need 2AMP supply + Clock:1Ghz + [usb+ssd] to achieve this speed) (--db-sync-mode=fastest:async:1000). berkeley-db optimized processing times (with per-block-checkpoint) 1. RPI2. 12-15 hours (*Need 2AMP supply + Clock:1Ghz + [usb+ssd] to achieve this speed) (--db-sync-mode=fastest:async:1000).
2015-07-10 14:09:32 -06:00
if(BERKELEY_DB)
add_definitions("-DBERKELEY_DB")
endif()
add_definitions("-DBLOCKCHAIN_DB=${BLOCKCHAIN_DB}")
find_package(Libunwind)
# Can't install hook in static build on OSX, because OSX linker does not support --wrap
cmake: ARM: exclude libunwind in static build Else error in build with STATIC=ON: cd /home/redfish/bitmonero/build/release/src/miner && /usr/bin/cmake -E cmake_link_script CMakeFiles/simpleminer.dir/link.txt --verbose=1 /usr/bin/c++ -std=c++11 -D_GNU_SOURCE -Wall -Wextra -Wpointer-arith -Wundef -Wvla -Wwrite-strings -Wno-error=extra -Wno-error=deprecated-declarations -Wno-unused-parameter -Wno-unused-variable -Wno-error=unused-variable -Wno-error=undef -Wno-error=uninitialized -Wlogical-op -Wno-error=maybe-uninitialized -Wno-reorder -Wno-missing-field-initializers -march=armv7-a -fno-strict-aliasing -mfloat-abi=hard -DNDEBUG -O2 -flto -ffat-lto-objects -static-libgcc -static-libstdc++ -Wl,--wrap=__cxa_throw CMakeFiles/simpleminer.dir/simpleminer.cpp.o -o ../../bin/simpleminer -rdynamic -Wl,-Bstatic -lrt -Wl,-Bdynamic -ldl ../cryptonote_core/libcryptonote_core.a ../common/libcommon.a -Wl,-Bstatic -lboost_filesystem -lboost_program_options -lboost_regex -lboost_chrono -lboost_system -lboost_thread -Wl,-Bdynamic -pthread -Wl,-Bstatic -lrt -Wl,-Bdynamic -ldl ../blockchain_db/libblockchain_db.a ../cryptonote_core/libcryptonote_core.a ../blockchain_db/libblockchain_db.a ../../contrib/otshell_utils/libotshell_utils.a ../blocks/libblocks.a ../common/libcommon.a ../../external/unbound/libunbound.a -lssl -lcrypto -lunwind -Wl,-Bstatic -lboost_program_options ../crypto/libcrypto.a -lboost_date_time -lboost_serialization -lboost_filesystem ../../external/db_drivers/liblmdb/liblmdb.a -Wl,-Bdynamic -pthread -Wl,-Bstatic -lboost_chrono -lboost_system -lboost_thread -lrt -Wl,-Bdynamic -ldl /usr/bin/ld: ../../bin/simpleminer: hidden symbol `__aeabi_unwind_cpp_pr0' in /usr/lib/gcc/armv7l-unknown-linux-gnueabihf/6.1.1/libgcc_eh.a(unwind-arm.o) is referenced by DSO /usr/bin/ld: final link failed: Bad value collect2: error: ld returned 1 exit status
2016-08-28 05:12:36 -06:00
# On ARM, having libunwind package (with .so's only) installed breaks static link.
if(LIBUNWIND_FOUND AND NOT (STATIC AND (APPLE OR ARM)))
set(DEFAULT_STACK_TRACE ON)
else()
set(DEFAULT_STACK_TRACE OFF)
set(LIBUNWIND_LIBRARIES "")
endif()
option(STACK_TRACE "Install a hook that dumps stack on exception" ${DEFAULT_STACK_TRACE})
if(STACK_TRACE)
message(STATUS "Stack trace on exception enabled")
else()
message(STATUS "Stack trace on exception disabled")
endif()
2014-06-11 11:15:23 -06:00
if (UNIX AND NOT APPLE)
# Note that at the time of this writing the -Wstrict-prototypes flag added below will make this fail
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads)
2014-06-11 11:15:23 -06:00
endif()
add_subdirectory(external)
# Final setup for miniupnpc
if(UPNP_STATIC)
add_definitions("-DUPNP_STATIC")
else()
add_definitions("-DUPNP_DYNAMIC")
include_directories(${UPNP_INCLUDE})
endif()
# Final setup for libunbound
2014-09-24 12:38:24 -06:00
include_directories(${UNBOUND_INCLUDE})
2014-10-21 16:39:15 -06:00
link_directories(${UNBOUND_LIBRARY_DIRS})
# Final setup for rapidjson
include_directories(external/rapidjson)
# Final setup for liblmdb
include_directories(${LMDB_INCLUDE})
# Final setup for Berkeley DB
** CHANGES ARE EXPERIMENTAL (FOR TESTING ONLY) Bockchain: 1. Optim: Multi-thread long-hash computation when encountering groups of blocks. 2. Optim: Cache verified txs and return result from cache instead of re-checking whenever possible. 3. Optim: Preload output-keys when encoutering groups of blocks. Sort by amount and global-index before bulk querying database and multi-thread when possible. 4. Optim: Disable double spend check on block verification, double spend is already detected when trying to add blocks. 5. Optim: Multi-thread signature computation whenever possible. 6. Patch: Disable locking (recursive mutex) on called functions from check_tx_inputs which causes slowdowns (only seems to happen on ubuntu/VMs??? Reason: TBD) 7. Optim: Removed looped full-tx hash computation when retrieving transactions from pool (???). 8. Optim: Cache difficulty/timestamps (735 blocks) for next-difficulty calculations so that only 2 db reads per new block is needed when a new block arrives (instead of 1470 reads). Berkeley-DB: 1. Fix: 32-bit data errors causing wrong output global indices and failure to send blocks to peers (etc). 2. Fix: Unable to pop blocks on reorganize due to transaction errors. 3. Patch: Large number of transaction aborts when running multi-threaded bulk queries. 4. Patch: Insufficient locks error when running full sync. 5. Patch: Incorrect db stats when returning from an immediate exit from "pop block" operation. 6. Optim: Add bulk queries to get output global indices. 7. Optim: Modified output_keys table to store public_key+unlock_time+height for single transaction lookup (vs 3) 8. Optim: Used output_keys table retrieve public_keys instead of going through output_amounts->output_txs+output_indices->txs->output:public_key 9. Optim: Added thread-safe buffers used when multi-threading bulk queries. 10. Optim: Added support for nosync/write_nosync options for improved performance (*see --db-sync-mode option for details) 11. Mod: Added checkpoint thread and auto-remove-logs option. 12. *Now usable on 32-bit systems like RPI2. LMDB: 1. Optim: Added custom comparison for 256-bit key tables (minor speed-up, TBD: get actual effect) 2. Optim: Modified output_keys table to store public_key+unlock_time+height for single transaction lookup (vs 3) 3. Optim: Used output_keys table retrieve public_keys instead of going through output_amounts->output_txs+output_indices->txs->output:public_key 4. Optim: Added support for sync/writemap options for improved performance (*see --db-sync-mode option for details) 5. Mod: Auto resize to +1GB instead of multiplier x1.5 ETC: 1. Minor optimizations for slow-hash for ARM (RPI2). Incomplete. 2. Fix: 32-bit saturation bug when computing next difficulty on large blocks. [PENDING ISSUES] 1. Berkely db has a very slow "pop-block" operation. This is very noticeable on the RPI2 as it sometimes takes > 10 MINUTES to pop a block during reorganization. This does not happen very often however, most reorgs seem to take a few seconds but it possibly depends on the number of outputs present. TBD. 2. Berkeley db, possible bug "unable to allocate memory". TBD. [NEW OPTIONS] (*Currently all enabled for testing purposes) 1. --fast-block-sync arg=[0:1] (default: 1) a. 0 = Compute long hash per block (may take a while depending on CPU) b. 1 = Skip long-hash and verify blocks based on embedded known good block hashes (faster, minimal CPU dependence) 2. --db-sync-mode arg=[[safe|fast|fastest]:[sync|async]:[nblocks_per_sync]] (default: fastest:async:1000) a. safe = fdatasync/fsync (or equivalent) per stored block. Very slow, but safest option to protect against power-out/crash conditions. b. fast/fastest = Enables asynchronous fdatasync/fsync (or equivalent). Useful for battery operated devices or STABLE systems with UPS and/or systems with battery backed write cache/solid state cache. Fast - Write meta-data but defer data flush. Fastest - Defer meta-data and data flush. Sync - Flush data after nblocks_per_sync and wait. Async - Flush data after nblocks_per_sync but do not wait for the operation to finish. 3. --prep-blocks-threads arg=[n] (default: 4 or system max threads, whichever is lower) Max number of threads to use when computing long-hash in groups. 4. --show-time-stats arg=[0:1] (default: 1) Show benchmark related time stats. 5. --db-auto-remove-logs arg=[0:1] (default: 1) For berkeley-db only. Auto remove logs if enabled. **Note: lmdb and berkeley-db have changes to the tables and are not compatible with official git head version. At the moment, you need a full resync to use this optimized version. [PERFORMANCE COMPARISON] **Some figures are approximations only. Using a baseline machine of an i7-2600K+SSD+(with full pow computation): 1. The optimized lmdb/blockhain core can process blocks up to 585K for ~1.25 hours + download time, so it usually takes 2.5 hours to sync the full chain. 2. The current head with memory can process blocks up to 585K for ~4.2 hours + download time, so it usually takes 5.5 hours to sync the full chain. 3. The current head with lmdb can process blocks up to 585K for ~32 hours + download time and usually takes 36 hours to sync the full chain. Averate procesing times (with full pow computation): lmdb-optimized: 1. tx_ave = 2.5 ms / tx 2. block_ave = 5.87 ms / block memory-official-repo: 1. tx_ave = 8.85 ms / tx 2. block_ave = 19.68 ms / block lmdb-official-repo (0f4a036437fd41a5498ee5e74e2422ea6177aa3e) 1. tx_ave = 47.8 ms / tx 2. block_ave = 64.2 ms / block **Note: The following data denotes processing times only (does not include p2p download time) lmdb-optimized processing times (with full pow computation): 1. Desktop, Quad-core / 8-threads 2600k (8Mb) - 1.25 hours processing time (--db-sync-mode=fastest:async:1000). 2. Laptop, Dual-core / 4-threads U4200 (3Mb) - 4.90 hours processing time (--db-sync-mode=fastest:async:1000). 3. Embedded, Quad-core / 4-threads Z3735F (2x1Mb) - 12.0 hours processing time (--db-sync-mode=fastest:async:1000). lmdb-optimized processing times (with per-block-checkpoint) 1. Desktop, Quad-core / 8-threads 2600k (8Mb) - 10 minutes processing time (--db-sync-mode=fastest:async:1000). berkeley-db optimized processing times (with full pow computation) 1. Desktop, Quad-core / 8-threads 2600k (8Mb) - 1.8 hours processing time (--db-sync-mode=fastest:async:1000). 2. RPI2. Improved from estimated 3 months(???) into 2.5 days (*Need 2AMP supply + Clock:1Ghz + [usb+ssd] to achieve this speed) (--db-sync-mode=fastest:async:1000). berkeley-db optimized processing times (with per-block-checkpoint) 1. RPI2. 12-15 hours (*Need 2AMP supply + Clock:1Ghz + [usb+ssd] to achieve this speed) (--db-sync-mode=fastest:async:1000).
2015-07-10 14:09:32 -06:00
if (BERKELEY_DB)
include_directories(${BDB_INCLUDE})
endif()
# Final setup for libunwind
include_directories(${LIBUNWIND_INCLUDE})
link_directories(${LIBUNWIND_LIBRARY_DIRS})
2014-03-03 15:07:58 -07:00
if(MSVC)
add_definitions("/bigobj /MP /W3 /GS- /D_CRT_SECURE_NO_WARNINGS /wd4996 /wd4345 /D_WIN32_WINNT=0x0600 /DWIN32_LEAN_AND_MEAN /DGTEST_HAS_TR1_TUPLE=0 /FIinline_c.h /D__SSE4_1__")
# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Dinline=__inline")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /STACK:10485760")
2014-03-03 15:07:58 -07:00
if(STATIC)
foreach(VAR CMAKE_C_FLAGS_DEBUG CMAKE_CXX_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE CMAKE_CXX_FLAGS_RELEASE)
string(REPLACE "/MD" "/MT" ${VAR} "${${VAR}}")
endforeach()
endif()
include_directories(SYSTEM src/platform/msc)
else()
include(TestCXXAcceptsFlag)
set(ARCH native CACHE STRING "CPU to build for: -march value or 'default' to not pass -march at all")
message(STATUS "Building on ${CMAKE_SYSTEM_PROCESSOR} for ${ARCH}")
if(ARCH STREQUAL "default")
2014-03-03 15:07:58 -07:00
set(ARCH_FLAG "")
else()
set(ARCH_FLAG "-march=${ARCH}")
2014-03-03 15:07:58 -07:00
endif()
set(WARNINGS "-Wall -Wextra -Wpointer-arith -Wundef -Wvla -Wwrite-strings -Wno-error=extra -Wno-error=deprecated-declarations -Wno-unused-parameter -Wno-unused-variable -Wno-error=unused-variable -Wno-error=undef -Wno-error=uninitialized")
if(NOT MINGW)
cmake: do not pass -Werror when building tests The tests currently issue a warning that "warning: -fassociative-math disabled; other options take precedence" The associative math optimization is turned on indirectly by -Ofast. Apparently, the optimization is forced to be disabled, while compiling test harnesses generated by Google Test framework. Unfortunately, there is no -Wno-error=* flag to disable this warning (see gcc --help=warnings). An alternative to this patch is to disable the optimization explicitly with -fno-associative-math, but that seems worse. Another alternative is to not pass -Ofast for tests build, but we want the tests to be built with exact same optimization flags as the code being tested, otherwise the value of the tests is diminished. Another alternative is to remove -Werror from the entire build, but it's good to include that flag to preclude people leaving warnings. A note regarding implementation of not passing -Werror for tests: I considered filtering out -Werror from CMAKE_{C,CXX}_FLAGS but that seems to be worse because it's surprizing behavior, to those reading the code that adds -Werror. It is better to add it for when it is used and not added otherwise. I also considered relying on order, adding -Werror after inluding 'tests' subdir, but before including the other subdirs, but that also seems cryptic to the reader. So, I settled with the current solution, of explicitly setting CMAKE_{C,CXX}_FLAGS to different values before including the respective subdir. Testing done: compared compiler invocation for non-tests source files using `make VERBOSE=1` with and without this commit: the only difference is the position of -Werror. So, this commit doesn't change the binary.
2016-07-04 19:21:02 -06:00
set(WARNINGS_AS_ERRORS_FLAG "-Werror")
endif()
2014-03-03 15:07:58 -07:00
if(CMAKE_C_COMPILER_ID STREQUAL "Clang")
2016-07-20 06:24:50 -06:00
set(WARNINGS "${WARNINGS} -Wno-deprecated-register -Wno-error=mismatched-tags -Wno-error=null-conversion -Wno-overloaded-shift-op-parentheses -Wno-error=shift-count-overflow -Wno-error=tautological-constant-out-of-range-compare -Wno-error=unused-private-field -Wno-error=unneeded-internal-declaration")
if(ARM)
set(WARNINGS "${WARNINGS} -Wno-error=inline-asm")
endif()
2014-03-03 15:07:58 -07:00
else()
set(WARNINGS "${WARNINGS} -Wlogical-op -Wno-error=maybe-uninitialized")
endif()
if(MINGW)
set(WARNINGS "${WARNINGS} -Wno-error=unused-value -Wno-error=unused-but-set-variable")
set(MINGW_FLAG "${MINGW_FLAG} -DWIN32_LEAN_AND_MEAN")
2014-08-06 10:43:01 -06:00
set(Boost_THREADAPI win32)
2014-03-03 15:07:58 -07:00
include_directories(SYSTEM src/platform/mingw)
# mingw doesn't support LTO (multiple definition errors at link time)
set(USE_LTO_DEFAULT false)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--stack,10485760")
if(NOT BUILD_64)
add_definitions(-DWINVER=0x0501 -D_WIN32_WINNT=0x0501)
endif()
2014-03-03 15:07:58 -07:00
endif()
set(C_WARNINGS "-Waggregate-return -Wnested-externs -Wold-style-definition -Wstrict-prototypes")
set(CXX_WARNINGS "-Wno-reorder -Wno-missing-field-initializers")
2015-05-31 07:39:56 -06:00
try_compile(STATIC_ASSERT_RES "${CMAKE_CURRENT_BINARY_DIR}/static-assert" "${CMAKE_CURRENT_SOURCE_DIR}/cmake/test-static-assert.c" COMPILE_DEFINITIONS "-std=c11")
2014-03-03 15:07:58 -07:00
if(STATIC_ASSERT_RES)
set(STATIC_ASSERT_FLAG "")
else()
set(STATIC_ASSERT_FLAG "-Dstatic_assert=_Static_assert")
endif()
2015-01-26 14:19:53 -07:00
option(COVERAGE "Enable profiling for test coverage report" 0)
if(COVERAGE)
message(STATUS "Building with profiling for test coverage report")
set(COVERAGE_FLAGS "-fprofile-arcs -ftest-coverage --coverage")
endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c11 -D_GNU_SOURCE ${MINGW_FLAG} ${STATIC_ASSERT_FLAG} ${WARNINGS} ${C_WARNINGS} ${ARCH_FLAG} ${COVERAGE_FLAGS} ${PIC_FLAG}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -D_GNU_SOURCE ${MINGW_FLAG} ${WARNINGS} ${CXX_WARNINGS} ${ARCH_FLAG} ${COVERAGE_FLAGS} ${PIC_FLAG}")
# With GCC 6.1.1 the compiled binary malfunctions due to aliasing. Until that
# is fixed in the code (Issue #847), force compiler to be conservative.
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-strict-aliasing")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing")
2015-01-26 14:19:53 -07:00
option(NO_AES "Explicitly disable AES support" ${NO_AES})
if(NOT NO_AES AND NOT ARM)
message(STATUS "AES support enabled")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes")
elseif(ARM6)
message(STATUS "AES support not available on ARMv6")
elseif(ARM7)
message(STATUS "AES support not available on ARMv7")
elseif(ARM8)
CHECK_CXX_ACCEPTS_FLAG("-march=${ARCH}+crypto" ARCH_PLUS_CRYPTO)
if(ARCH_PLUS_CRYPTO)
message(STATUS "Crypto extensions enabled for ARMv8")
set(ARCH_FLAG "-march=${ARCH}+crypto")
else()
message(STATUS "Crypto extensions unavailable on your ARMv8 device")
endif()
2015-01-26 14:19:53 -07:00
else()
message(STATUS "AES support disabled")
2015-01-26 14:19:53 -07:00
endif()
2015-04-06 06:00:09 -06:00
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c11 -D_GNU_SOURCE ${MINGW_FLAG} ${STATIC_ASSERT_FLAG} ${WARNINGS} ${C_WARNINGS} ${ARCH_FLAG} ${COVERAGE_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -D_GNU_SOURCE ${MINGW_FLAG} ${WARNINGS} ${CXX_WARNINGS} ${ARCH_FLAG} ${COVERAGE_FLAGS}")
# With GCC 6.1.1 the compiled binary malfunctions due to aliasing. Until that
# is fixed in the code (Issue #847), force compiler to be conservative.
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-strict-aliasing")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing")
if(ARM)
message(STATUS "Setting FPU Flags for ARM Processors")
#NB NEON hardware does not fully implement the IEEE 754 standard for floating-point arithmetic
#Need custom assembly code to take full advantage of NEON SIMD
#Cortex-A5/9 -mfpu=neon-fp16
#Cortex-A7/15 -mfpu=neon-vfpv4
#Cortex-A8 -mfpu=neon
#ARMv8 -FP and SIMD on by default for all ARM8v-A series, NO -mfpu setting needed
#For custom -mtune, processor IDs for ARMv8-A series:
#0xd04 - Cortex-A35
#0xd07 - Cortex-A57
#0xd08 - Cortex-A72
#0xd03 - Cortex-A73
if(NOT ARM8)
CHECK_CXX_ACCEPTS_FLAG(-mfpu=vfp3-d16 CXX_ACCEPTS_VFP3_D16)
CHECK_CXX_ACCEPTS_FLAG(-mfpu=vfp4 CXX_ACCEPTS_VFP4)
CHECK_CXX_ACCEPTS_FLAG(-mfloat-abi=hard CXX_ACCEPTS_MFLOAT_HARD)
CHECK_CXX_ACCEPTS_FLAG(-mfloat-abi=softfp CXX_ACCEPTS_MFLOAT_SOFTFP)
endif()
2015-04-06 06:00:09 -06:00
if(ARM8)
CHECK_CXX_ACCEPTS_FLAG(-mfix-cortex-a53-835769 CXX_ACCEPTS_MFIX_CORTEX_A53_835769)
CHECK_CXX_ACCEPTS_FLAG(-mfix-cortex-a53-843419 CXX_ACCEPTS_MFIX_CORTEX_A53_843419)
endif()
if(ARM6)
message(STATUS "Selecting VFP for ARMv6")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=vfp")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=vfp")
endif(ARM6)
if(ARM7)
if(CXX_ACCEPTS_VFP3_D16 AND NOT CXX_ACCEPTS_VFP4)
message(STATUS "Selecting VFP3 for ARMv7")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=vfp3-d16")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=vfp3-d16")
endif()
if(CXX_ACCEPTS_VFP4)
message(STATUS "Selecting VFP4 for ARMv7")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=vfp4")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=vfp4")
endif()
if(CXX_ACCEPTS_MFLOAT_HARD)
message(STATUS "Setting Hardware ABI for Floating Point")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfloat-abi=hard")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=hard")
endif()
if(CXX_ACCEPTS_MFLOAT_SOFTFP AND NOT CXX_ACCEPTS_MFLOAT_HARD)
message(STATUS "Setting Software ABI for Floating Point")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfloat-abi=softfp")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=softfp")
endif()
endif(ARM7)
if(ARM8)
if(CXX_ACCEPTS_MFIX_CORTEX_A53_835769)
message(STATUS "Enabling Cortex-A53 workaround 835769")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfix-cortex-a53-835769")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfix-cortex-a53-835769")
endif()
if(CXX_ACCEPTS_MFIX_CORTEX_A53_843419)
message(STATUS "Enabling Cortex-A53 workaround 843419")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfix-cortex-a53-843419")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfix-cortex-a53-843419")
endif()
endif(ARM8)
endif(ARM)
** CHANGES ARE EXPERIMENTAL (FOR TESTING ONLY) Bockchain: 1. Optim: Multi-thread long-hash computation when encountering groups of blocks. 2. Optim: Cache verified txs and return result from cache instead of re-checking whenever possible. 3. Optim: Preload output-keys when encoutering groups of blocks. Sort by amount and global-index before bulk querying database and multi-thread when possible. 4. Optim: Disable double spend check on block verification, double spend is already detected when trying to add blocks. 5. Optim: Multi-thread signature computation whenever possible. 6. Patch: Disable locking (recursive mutex) on called functions from check_tx_inputs which causes slowdowns (only seems to happen on ubuntu/VMs??? Reason: TBD) 7. Optim: Removed looped full-tx hash computation when retrieving transactions from pool (???). 8. Optim: Cache difficulty/timestamps (735 blocks) for next-difficulty calculations so that only 2 db reads per new block is needed when a new block arrives (instead of 1470 reads). Berkeley-DB: 1. Fix: 32-bit data errors causing wrong output global indices and failure to send blocks to peers (etc). 2. Fix: Unable to pop blocks on reorganize due to transaction errors. 3. Patch: Large number of transaction aborts when running multi-threaded bulk queries. 4. Patch: Insufficient locks error when running full sync. 5. Patch: Incorrect db stats when returning from an immediate exit from "pop block" operation. 6. Optim: Add bulk queries to get output global indices. 7. Optim: Modified output_keys table to store public_key+unlock_time+height for single transaction lookup (vs 3) 8. Optim: Used output_keys table retrieve public_keys instead of going through output_amounts->output_txs+output_indices->txs->output:public_key 9. Optim: Added thread-safe buffers used when multi-threading bulk queries. 10. Optim: Added support for nosync/write_nosync options for improved performance (*see --db-sync-mode option for details) 11. Mod: Added checkpoint thread and auto-remove-logs option. 12. *Now usable on 32-bit systems like RPI2. LMDB: 1. Optim: Added custom comparison for 256-bit key tables (minor speed-up, TBD: get actual effect) 2. Optim: Modified output_keys table to store public_key+unlock_time+height for single transaction lookup (vs 3) 3. Optim: Used output_keys table retrieve public_keys instead of going through output_amounts->output_txs+output_indices->txs->output:public_key 4. Optim: Added support for sync/writemap options for improved performance (*see --db-sync-mode option for details) 5. Mod: Auto resize to +1GB instead of multiplier x1.5 ETC: 1. Minor optimizations for slow-hash for ARM (RPI2). Incomplete. 2. Fix: 32-bit saturation bug when computing next difficulty on large blocks. [PENDING ISSUES] 1. Berkely db has a very slow "pop-block" operation. This is very noticeable on the RPI2 as it sometimes takes > 10 MINUTES to pop a block during reorganization. This does not happen very often however, most reorgs seem to take a few seconds but it possibly depends on the number of outputs present. TBD. 2. Berkeley db, possible bug "unable to allocate memory". TBD. [NEW OPTIONS] (*Currently all enabled for testing purposes) 1. --fast-block-sync arg=[0:1] (default: 1) a. 0 = Compute long hash per block (may take a while depending on CPU) b. 1 = Skip long-hash and verify blocks based on embedded known good block hashes (faster, minimal CPU dependence) 2. --db-sync-mode arg=[[safe|fast|fastest]:[sync|async]:[nblocks_per_sync]] (default: fastest:async:1000) a. safe = fdatasync/fsync (or equivalent) per stored block. Very slow, but safest option to protect against power-out/crash conditions. b. fast/fastest = Enables asynchronous fdatasync/fsync (or equivalent). Useful for battery operated devices or STABLE systems with UPS and/or systems with battery backed write cache/solid state cache. Fast - Write meta-data but defer data flush. Fastest - Defer meta-data and data flush. Sync - Flush data after nblocks_per_sync and wait. Async - Flush data after nblocks_per_sync but do not wait for the operation to finish. 3. --prep-blocks-threads arg=[n] (default: 4 or system max threads, whichever is lower) Max number of threads to use when computing long-hash in groups. 4. --show-time-stats arg=[0:1] (default: 1) Show benchmark related time stats. 5. --db-auto-remove-logs arg=[0:1] (default: 1) For berkeley-db only. Auto remove logs if enabled. **Note: lmdb and berkeley-db have changes to the tables and are not compatible with official git head version. At the moment, you need a full resync to use this optimized version. [PERFORMANCE COMPARISON] **Some figures are approximations only. Using a baseline machine of an i7-2600K+SSD+(with full pow computation): 1. The optimized lmdb/blockhain core can process blocks up to 585K for ~1.25 hours + download time, so it usually takes 2.5 hours to sync the full chain. 2. The current head with memory can process blocks up to 585K for ~4.2 hours + download time, so it usually takes 5.5 hours to sync the full chain. 3. The current head with lmdb can process blocks up to 585K for ~32 hours + download time and usually takes 36 hours to sync the full chain. Averate procesing times (with full pow computation): lmdb-optimized: 1. tx_ave = 2.5 ms / tx 2. block_ave = 5.87 ms / block memory-official-repo: 1. tx_ave = 8.85 ms / tx 2. block_ave = 19.68 ms / block lmdb-official-repo (0f4a036437fd41a5498ee5e74e2422ea6177aa3e) 1. tx_ave = 47.8 ms / tx 2. block_ave = 64.2 ms / block **Note: The following data denotes processing times only (does not include p2p download time) lmdb-optimized processing times (with full pow computation): 1. Desktop, Quad-core / 8-threads 2600k (8Mb) - 1.25 hours processing time (--db-sync-mode=fastest:async:1000). 2. Laptop, Dual-core / 4-threads U4200 (3Mb) - 4.90 hours processing time (--db-sync-mode=fastest:async:1000). 3. Embedded, Quad-core / 4-threads Z3735F (2x1Mb) - 12.0 hours processing time (--db-sync-mode=fastest:async:1000). lmdb-optimized processing times (with per-block-checkpoint) 1. Desktop, Quad-core / 8-threads 2600k (8Mb) - 10 minutes processing time (--db-sync-mode=fastest:async:1000). berkeley-db optimized processing times (with full pow computation) 1. Desktop, Quad-core / 8-threads 2600k (8Mb) - 1.8 hours processing time (--db-sync-mode=fastest:async:1000). 2. RPI2. Improved from estimated 3 months(???) into 2.5 days (*Need 2AMP supply + Clock:1Ghz + [usb+ssd] to achieve this speed) (--db-sync-mode=fastest:async:1000). berkeley-db optimized processing times (with per-block-checkpoint) 1. RPI2. 12-15 hours (*Need 2AMP supply + Clock:1Ghz + [usb+ssd] to achieve this speed) (--db-sync-mode=fastest:async:1000).
2015-07-10 14:09:32 -06:00
2014-04-30 14:50:06 -06:00
if(APPLE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGTEST_HAS_TR1_TUPLE=0")
2014-04-30 14:50:06 -06:00
endif()
set(DEBUG_FLAGS "-g3")
2014-03-03 15:07:58 -07:00
if(CMAKE_C_COMPILER_ID STREQUAL "GNU" AND NOT (CMAKE_C_COMPILER_VERSION VERSION_LESS 4.8))
set(DEBUG_FLAGS "${DEBUG_FLAGS} -Og ")
2014-03-03 15:07:58 -07:00
else()
set(DEBUG_FLAGS "${DEBUG_FLAGS} -O0 ")
2014-03-03 15:07:58 -07:00
endif()
if(NOT DEFINED USE_LTO_DEFAULT)
set(USE_LTO_DEFAULT true)
endif()
set(USE_LTO ${USE_LTO_DEFAULT} CACHE BOOL "Use Link-Time Optimization (Release mode only)")
2014-09-15 14:47:26 -06:00
if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
2015-05-26 03:07:58 -06:00
# There is a clang bug that does not allow to compile code that uses AES-NI intrinsics if -flto is enabled, so explicitly disable
set(USE_LTO false)
2014-09-15 14:47:26 -06:00
endif()
2015-05-26 03:07:58 -06:00
if(USE_LTO)
2014-05-22 05:00:48 -06:00
set(RELEASE_FLAGS "${RELEASE_FLAGS} -flto")
if(STATIC)
set(RELEASE_FLAGS "${RELEASE_FLAGS} -ffat-lto-objects")
endif()
# Since gcc 4.9 the LTO format is non-standard (slim), so we need the gcc-specific ar and ranlib binaries
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND NOT (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9.0) AND NOT OPENBSD)
# When invoking cmake on distributions on which gcc's binaries are prefixed
# with an arch-specific triplet, the user must specify -DCHOST=<prefix>
if (DEFINED CHOST)
set(CMAKE_AR "${CHOST}-gcc-ar")
set(CMAKE_RANLIB "${CHOST}-gcc-ranlib")
else()
set(CMAKE_AR "gcc-ar")
set(CMAKE_RANLIB "gcc-ranlib")
endif()
endif()
2014-05-22 05:00:48 -06:00
endif()
2014-03-03 15:07:58 -07:00
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} ${DEBUG_FLAGS}")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${DEBUG_FLAGS}")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} ${RELEASE_FLAGS}")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${RELEASE_FLAGS}")
if(STATIC)
# STATIC already configures most deps to be linked in statically,
# here we make more deps static if the platform permits it
if (MINGW)
# On Windows, this is as close to fully-static as we get:
# this leaves only deps on /c/Windows/system32/*.dll
set(STATIC_FLAGS "-static")
elseif (NOT (APPLE OR FREEBSD OR OPENBSD))
# On Linux, we don't support fully static build, but these can be static
set(STATIC_FLAGS "-static-libgcc -static-libstdc++")
endif()
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${STATIC_FLAGS} ")
endif()
2014-03-03 15:07:58 -07:00
endif()
if (${BOOST_IGNORE_SYSTEM_PATHS} STREQUAL "ON")
set(Boost_NO_SYSTEM_PATHS TRUE)
2014-09-22 04:30:53 -06:00
endif()
set(OLD_LIB_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
2014-03-03 15:07:58 -07:00
if(STATIC)
if(MINGW)
set(CMAKE_FIND_LIBRARY_SUFFIXES .a)
endif()
2014-03-03 15:07:58 -07:00
set(Boost_USE_STATIC_LIBS ON)
set(Boost_USE_STATIC_RUNTIME ON)
endif()
find_package(Boost 1.58 QUIET REQUIRED COMPONENTS system filesystem thread date_time chrono regex serialization program_options)
set(CMAKE_FIND_LIBRARY_SUFFIXES ${OLD_LIB_SUFFIXES})
if(NOT Boost_FOUND)
die("Could not find Boost libraries, please make sure you have installed Boost or libboost-all-dev (1.58) or the equivalent")
2014-03-03 15:07:58 -07:00
endif()
2014-03-03 15:07:58 -07:00
include_directories(SYSTEM ${Boost_INCLUDE_DIRS})
if(MINGW)
set(EXTRA_LIBRARIES mswsock;ws2_32;iphlpapi)
2016-01-21 11:18:26 -07:00
elseif(APPLE OR FREEBSD OR OPENBSD)
set(EXTRA_LIBRARIES "")
2014-04-30 14:50:06 -06:00
elseif(NOT MSVC)
find_library(RT rt)
set(EXTRA_LIBRARIES ${RT})
2014-03-03 15:07:58 -07:00
endif()
list(APPEND EXTRA_LIBRARIES ${CMAKE_DL_LIBS})
if(CMAKE_C_COMPILER_ID STREQUAL "Clang" AND ARCH_WIDTH EQUAL "32")
find_library(ATOMIC atomic)
list(APPEND EXTRA_LIBRARIES ${ATOMIC})
endif()
include(version.cmake)
2014-03-03 15:07:58 -07:00
function (treat_warnings_as_errors dirs)
foreach(dir ${ARGV})
set_property(DIRECTORY ${dir}
APPEND PROPERTY COMPILE_FLAGS "-Werror")
endforeach()
endfunction()
cmake: do not pass -Werror when building tests The tests currently issue a warning that "warning: -fassociative-math disabled; other options take precedence" The associative math optimization is turned on indirectly by -Ofast. Apparently, the optimization is forced to be disabled, while compiling test harnesses generated by Google Test framework. Unfortunately, there is no -Wno-error=* flag to disable this warning (see gcc --help=warnings). An alternative to this patch is to disable the optimization explicitly with -fno-associative-math, but that seems worse. Another alternative is to not pass -Ofast for tests build, but we want the tests to be built with exact same optimization flags as the code being tested, otherwise the value of the tests is diminished. Another alternative is to remove -Werror from the entire build, but it's good to include that flag to preclude people leaving warnings. A note regarding implementation of not passing -Werror for tests: I considered filtering out -Werror from CMAKE_{C,CXX}_FLAGS but that seems to be worse because it's surprizing behavior, to those reading the code that adds -Werror. It is better to add it for when it is used and not added otherwise. I also considered relying on order, adding -Werror after inluding 'tests' subdir, but before including the other subdirs, but that also seems cryptic to the reader. So, I settled with the current solution, of explicitly setting CMAKE_{C,CXX}_FLAGS to different values before including the respective subdir. Testing done: compared compiler invocation for non-tests source files using `make VERBOSE=1` with and without this commit: the only difference is the position of -Werror. So, this commit doesn't change the binary.
2016-07-04 19:21:02 -06:00
add_subdirectory(contrib)
2014-03-03 15:07:58 -07:00
add_subdirectory(src)
treat_warnings_as_errors(contrib src)
option(BUILD_TESTS "Build tests." OFF)
if(BUILD_TESTS)
add_subdirectory(tests)
2015-01-02 09:52:46 -07:00
endif()
if(BUILD_DOCUMENTATION)
set(DOC_GRAPHS "YES" CACHE STRING "Create dependency graphs (needs graphviz)")
set(DOC_FULLGRAPHS "NO" CACHE STRING "Create call/callee graphs (large)")
find_program(DOT_PATH dot)
if (DOT_PATH STREQUAL "DOT_PATH-NOTFOUND")
message("Doxygen: graphviz not found - graphs disabled")
set(DOC_GRAPHS "NO")
endif()
find_package(Doxygen)
if(DOXYGEN_FOUND)
configure_file("cmake/Doxyfile.in" "Doxyfile" @ONLY)
configure_file("cmake/Doxygen.extra.css.in" "Doxygen.extra.css" @ONLY)
add_custom_target(doc
${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
COMMENT "Generating API documentation with Doxygen.." VERBATIM)
endif()
endif()
# when ON - will install libwallet_merged into "lib"
option(BUILD_GUI_DEPS "Build GUI dependencies." OFF)
# This is not nice, distribution packagers should not enable this, but depend
# on libunbound shipped with their distribution instead
option(INSTALL_VENDORED_LIBUNBOUND "Install libunbound binary built from source vendored with this repo." OFF)