Merge pull request #5174

0de14396 tests: add a CNv4 JIT test (moneromooo-monero)
24d281c3 crypto: plug CNv4 JIT into cn_slow_hash (moneromooo-monero)
78ab59ea crypto: clear cache after generating random program (moneromooo-monero)
b9a61884 performance_tests: add tests for new Cryptonight variants (moneromooo-monero)
fff23bf7 CNv4 JIT compiler for x86-64 and tests (SChernykh)
3dde67d8 blockchain: add v10 fork heights (moneromooo-monero)
This commit is contained in:
Riccardo Spagni 2019-03-04 17:06:37 +02:00
commit 815c8a48ab
No known key found for this signature in database
GPG Key ID: 55432DF31CCD4FCD
12 changed files with 3000 additions and 13 deletions

View File

@ -44,6 +44,8 @@ message(STATUS "CMake version ${CMAKE_VERSION}")
project(monero) project(monero)
enable_language(C ASM)
function (die msg) function (die msg)
if (NOT WIN32) if (NOT WIN32)
string(ASCII 27 Esc) string(ASCII 27 Esc)

View File

@ -45,6 +45,8 @@ set(crypto_sources
random.c random.c
skein.c skein.c
slow-hash.c slow-hash.c
CryptonightR_JIT.c
CryptonightR_template.S
tree-hash.c) tree-hash.c)
set(crypto_headers) set(crypto_headers)
@ -66,7 +68,9 @@ set(crypto_private_headers
oaes_lib.h oaes_lib.h
random.h random.h
skein.h skein.h
skein_port.h) skein_port.h
CryptonightR_JIT.h
CryptonightR_template.h)
monero_private_headers(cncrypto monero_private_headers(cncrypto
${crypto_private_headers}) ${crypto_private_headers})
@ -101,4 +105,5 @@ if (ANDROID OR IOS)
endif() endif()
endif() endif()
# cheat because cmake and ccache hate each other
set_property(SOURCE CryptonightR_template.S PROPERTY LANGUAGE C)

View File

@ -0,0 +1,102 @@
#include <assert.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include <unistd.h>
#include "int-util.h"
#include "hash-ops.h"
#include "variant4_random_math.h"
#include "CryptonightR_JIT.h"
#include "CryptonightR_template.h"
static const uint8_t prologue[] = {
0x4C, 0x8B, 0xD7, // mov r10, rdi
0x53, // push rbx
0x55, // push rbp
0x41, 0x57, // push r15
0x4C, 0x8B, 0xDC, // mov r11, rsp
0x41, 0x8B, 0x1A, // mov ebx, DWORD PTR [r10]
0x41, 0x8B, 0x72, 0x04, // mov esi, DWORD PTR [r10+4]
0x41, 0x8B, 0x7A, 0x08, // mov edi, DWORD PTR [r10+8]
0x41, 0x8B, 0x6A, 0x0C, // mov ebp, DWORD PTR [r10+12]
0x41, 0x8B, 0x62, 0x10, // mov esp, DWORD PTR [r10+16]
0x45, 0x8B, 0x7A, 0x14, // mov r15d, DWORD PTR [r10+20]
0x41, 0x8B, 0x42, 0x18, // mov eax, DWORD PTR [r10+24]
0x41, 0x8B, 0x52, 0x1C, // mov edx, DWORD PTR [r10+28]
0x45, 0x8B, 0x4A, 0x20, // mov r9d, DWORD PTR [r10+32]
};
static const uint8_t epilogue[] = {
0x49, 0x8B, 0xE3, // mov rsp, r11
0x41, 0x89, 0x1A, // mov DWORD PTR [r10], ebx
0x41, 0x89, 0x72, 0x04, // mov DWORD PTR [r10+4], esi
0x41, 0x89, 0x7A, 0x08, // mov DWORD PTR [r10+8], edi
0x41, 0x89, 0x6A, 0x0C, // mov DWORD PTR [r10+12], ebp
0x41, 0x5F, // pop r15
0x5D, // pop rbp
0x5B, // pop rbx
0xC3, // ret
};
#define APPEND_CODE(src, size) \
do { \
if (JIT_code + (size) > JIT_code_end) \
return -1; \
memcpy(JIT_code, (src), (size)); \
JIT_code += (size); \
} while (0)
int v4_generate_JIT_code(const struct V4_Instruction* code, v4_random_math_JIT_func buf, const size_t buf_size)
{
uint8_t* JIT_code = (uint8_t*) buf;
const uint8_t* JIT_code_end = JIT_code + buf_size;
APPEND_CODE(prologue, sizeof(prologue));
uint32_t prev_rot_src = 0xFFFFFFFFU;
for (int i = 0;; ++i)
{
const struct V4_Instruction inst = code[i];
if (inst.opcode == RET)
break;
const uint8_t opcode = (inst.opcode == MUL) ? inst.opcode : (inst.opcode + 2);
const uint32_t a = inst.dst_index;
const uint32_t b = inst.src_index;
const uint8_t c = opcode | (inst.dst_index << V4_OPCODE_BITS) | (((inst.src_index == 8) ? inst.dst_index : inst.src_index) << (V4_OPCODE_BITS + V4_DST_INDEX_BITS));
switch (inst.opcode)
{
case ROR:
case ROL:
if (b != prev_rot_src)
{
prev_rot_src = b;
const uint8_t* p1 = (const uint8_t*) instructions_mov[c];
const uint8_t* p2 = (const uint8_t*) instructions_mov[c + 1];
APPEND_CODE(p1, p2 - p1);
}
break;
}
if (a == prev_rot_src)
prev_rot_src = 0xFFFFFFFFU;
const uint8_t* p1 = (const uint8_t*) instructions[c];
const uint8_t* p2 = (const uint8_t*) instructions[c + 1];
APPEND_CODE(p1, p2 - p1);
if (inst.opcode == ADD)
*(uint32_t*)(JIT_code - 4) = inst.C;
}
APPEND_CODE(epilogue, sizeof(epilogue));
__builtin___clear_cache((char*)buf, (char*)JIT_code);
return 0;
}

View File

@ -0,0 +1,18 @@
#ifndef CRYPTONIGHTR_JIT_H
#define CRYPTONIGHTR_JIT_H
// Minimalistic JIT code generator for random math sequence in CryptonightR
//
// Usage:
// - Allocate writable and executable memory
// - Call v4_generate_JIT_code with "buf" pointed to memory allocated on previous step
// - Call the generated code instead of "v4_random_math(code, r)", omit the "code" parameter
typedef void (*v4_random_math_JIT_func)(uint32_t* r) __attribute__((sysv_abi));
// Given the random math sequence, generates machine code (x86-64) for it
// Returns 0 if code was generated successfully
// Returns -1 if provided buffer was too small
int v4_generate_JIT_code(const struct V4_Instruction* code, v4_random_math_JIT_func buf, const size_t buf_size);
#endif // CRYPTONIGHTR_JIT_H

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -40,6 +40,10 @@
#include "oaes_lib.h" #include "oaes_lib.h"
#include "variant2_int_sqrt.h" #include "variant2_int_sqrt.h"
#include "variant4_random_math.h" #include "variant4_random_math.h"
#include "CryptonightR_JIT.h"
#include <errno.h>
#include <string.h>
#define MEMORY (1 << 21) // 2MB scratchpad #define MEMORY (1 << 21) // 2MB scratchpad
#define ITER (1 << 20) #define ITER (1 << 20)
@ -51,6 +55,16 @@
extern void aesb_single_round(const uint8_t *in, uint8_t *out, const uint8_t *expandedKey); extern void aesb_single_round(const uint8_t *in, uint8_t *out, const uint8_t *expandedKey);
extern void aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *expandedKey); extern void aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *expandedKey);
static void local_abort(const char *msg)
{
fprintf(stderr, "%s\n", msg);
#ifdef NDEBUG
_exit(1);
#else
abort();
#endif
}
#define VARIANT1_1(p) \ #define VARIANT1_1(p) \
do if (variant == 1) \ do if (variant == 1) \
{ \ { \
@ -253,11 +267,18 @@ extern void aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *ex
#define VARIANT4_RANDOM_MATH_INIT() \ #define VARIANT4_RANDOM_MATH_INIT() \
v4_reg r[9]; \ v4_reg r[9]; \
struct V4_Instruction code[NUM_INSTRUCTIONS_MAX + 1]; \ struct V4_Instruction code[NUM_INSTRUCTIONS_MAX + 1]; \
int jit = use_v4_jit(); \
do if (variant >= 4) \ do if (variant >= 4) \
{ \ { \
for (int i = 0; i < 4; ++i) \ for (int i = 0; i < 4; ++i) \
V4_REG_LOAD(r + i, (uint8_t*)(state.hs.w + 12) + sizeof(v4_reg) * i); \ V4_REG_LOAD(r + i, (uint8_t*)(state.hs.w + 12) + sizeof(v4_reg) * i); \
v4_random_math_init(code, height); \ v4_random_math_init(code, height); \
if (jit) \
{ \
int ret = v4_generate_JIT_code(code, hp_jitfunc, 4096); \
if (ret < 0) \
local_abort("Error generating CryptonightR code"); \
} \
} while (0) } while (0)
#define VARIANT4_RANDOM_MATH(a, b, r, _b, _b1) \ #define VARIANT4_RANDOM_MATH(a, b, r, _b, _b1) \
@ -279,6 +300,9 @@ extern void aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *ex
V4_REG_LOAD(r + 7, _b1); \ V4_REG_LOAD(r + 7, _b1); \
V4_REG_LOAD(r + 8, (uint64_t*)(_b1) + 1); \ V4_REG_LOAD(r + 8, (uint64_t*)(_b1) + 1); \
\ \
if (jit) \
(*hp_jitfunc)(r); \
else \
v4_random_math(code, r); \ v4_random_math(code, r); \
\ \
memcpy(t, a, sizeof(uint64_t) * 2); \ memcpy(t, a, sizeof(uint64_t) * 2); \
@ -409,6 +433,9 @@ union cn_slow_hash_state
THREADV uint8_t *hp_state = NULL; THREADV uint8_t *hp_state = NULL;
THREADV int hp_allocated = 0; THREADV int hp_allocated = 0;
THREADV v4_random_math_JIT_func hp_jitfunc = NULL;
THREADV uint8_t *hp_jitfunc_memory = NULL;
THREADV int hp_jitfunc_allocated = 0;
#if defined(_MSC_VER) #if defined(_MSC_VER)
#define cpuid(info,x) __cpuidex(info,x,0) #define cpuid(info,x) __cpuidex(info,x,0)
@ -467,6 +494,31 @@ STATIC INLINE int force_software_aes(void)
return use; return use;
} }
volatile int use_v4_jit_flag = -1;
STATIC INLINE int use_v4_jit(void)
{
#if defined(__x86_64__)
if (use_v4_jit_flag != -1)
return use_v4_jit_flag;
const char *env = getenv("MONERO_USE_CNV4_JIT");
if (!env) {
use_v4_jit_flag = 0;
}
else if (!strcmp(env, "0") || !strcmp(env, "no")) {
use_v4_jit_flag = 0;
}
else {
use_v4_jit_flag = 1;
}
return use_v4_jit_flag;
#else
return 0;
#endif
}
STATIC INLINE int check_aes_hw(void) STATIC INLINE int check_aes_hw(void)
{ {
int cpuid_results[4]; int cpuid_results[4];
@ -718,6 +770,33 @@ void slow_hash_allocate_state(void)
hp_allocated = 0; hp_allocated = 0;
hp_state = (uint8_t *) malloc(MEMORY); hp_state = (uint8_t *) malloc(MEMORY);
} }
#if defined(_MSC_VER) || defined(__MINGW32__)
hp_jitfunc_memory = (uint8_t *) VirtualAlloc(hp_jitfunc_memory, 4096 + 4095,
MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
#else
#if defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__) || \
defined(__DragonFly__) || defined(__NetBSD__)
hp_jitfunc_memory = mmap(0, 4096 + 4095, PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANON, 0, 0);
#else
hp_jitfunc_memory = mmap(0, 4096 + 4095, PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
#endif
if(hp_jitfunc_memory == MAP_FAILED)
hp_jitfunc_memory = NULL;
#endif
hp_jitfunc_allocated = 1;
if (hp_jitfunc_memory == NULL)
{
hp_jitfunc_allocated = 0;
hp_jitfunc_memory = malloc(4096 + 4095);
}
hp_jitfunc = (v4_random_math_JIT_func)((size_t)(hp_jitfunc_memory + 4095) & ~4095);
#if !(defined(_MSC_VER) || defined(__MINGW32__))
mprotect(hp_jitfunc, 4096, PROT_READ | PROT_WRITE | PROT_EXEC);
#endif
} }
/** /**
@ -740,8 +819,22 @@ void slow_hash_free_state(void)
#endif #endif
} }
if(!hp_jitfunc_allocated)
free(hp_jitfunc_memory);
else
{
#if defined(_MSC_VER) || defined(__MINGW32__)
VirtualFree(hp_jitfunc_memory, 0, MEM_RELEASE);
#else
munmap(hp_jitfunc_memory, 4096 + 4095);
#endif
}
hp_state = NULL; hp_state = NULL;
hp_allocated = 0; hp_allocated = 0;
hp_jitfunc = NULL;
hp_jitfunc_memory = NULL;
hp_jitfunc_allocated = 0;
} }
/** /**

View File

@ -115,6 +115,9 @@ static const struct {
// version 9 starts from block 1686275, which is on or around the 19th of October, 2018. Fork time finalised on 2018-09-02. // version 9 starts from block 1686275, which is on or around the 19th of October, 2018. Fork time finalised on 2018-09-02.
{ 9, 1686275, 0, 1535889548 }, { 9, 1686275, 0, 1535889548 },
// version 10 starts from block 1788000, which is on or around the 9th of March, 2019. Fork time finalised on 2019-02-10.
{ 10, 1788000, 0, 1549792439 },
}; };
static const uint64_t mainnet_hard_fork_version_1_till = 1009826; static const uint64_t mainnet_hard_fork_version_1_till = 1009826;
@ -139,6 +142,7 @@ static const struct {
{ 7, 1057027, 0, 1512211236 }, { 7, 1057027, 0, 1512211236 },
{ 8, 1057058, 0, 1533211200 }, { 8, 1057058, 0, 1533211200 },
{ 9, 1057778, 0, 1533297600 }, { 9, 1057778, 0, 1533297600 },
{ 10, 1154318, 0, 1550153694 },
}; };
static const uint64_t testnet_hard_fork_version_1_till = 624633; static const uint64_t testnet_hard_fork_version_1_till = 624633;
@ -160,6 +164,7 @@ static const struct {
{ 7, 37000, 0, 1521600000 }, { 7, 37000, 0, 1521600000 },
{ 8, 176456, 0, 1537821770 }, { 8, 176456, 0, 1537821770 },
{ 9, 177176, 0, 1537821771 }, { 9, 177176, 0, 1537821771 },
{ 10, 269000, 0, 1550153694 },
}; };
//------------------------------------------------------------------ //------------------------------------------------------------------

View File

@ -52,3 +52,17 @@ set_property(TARGET cncrypto-tests
add_test( add_test(
NAME cncrypto NAME cncrypto
COMMAND cncrypto-tests "${CMAKE_CURRENT_SOURCE_DIR}/tests.txt") COMMAND cncrypto-tests "${CMAKE_CURRENT_SOURCE_DIR}/tests.txt")
add_executable(cnv4-jit-tests cnv4-jit.c)
target_link_libraries(cnv4-jit-tests
PRIVATE
crypto
common
${EXTRA_LIBRARIES})
set_property(TARGET cnv4-jit-tests
PROPERTY
FOLDER "tests")
add_test(
NAME cnv4-jit
COMMAND cnv4-jit-tests 1788000 1789000)

119
tests/crypto/cnv4-jit.c Normal file
View File

@ -0,0 +1,119 @@
// Copyright (c) 2019, The Monero Project
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without modification, are
// permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this list of
// conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice, this list
// of conditions and the following disclaimer in the documentation and/or other
// materials provided with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its contributors may be
// used to endorse or promote products derived from this software without specific
// prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <stdint.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include "crypto/hash-ops.h"
extern volatile int use_v4_jit_flag;
static int test(const uint8_t *data, size_t len, uint64_t height)
{
char hash0[32], hash1[32];
use_v4_jit_flag = 0;
cn_slow_hash(data, len, hash0, 4, 0, height);
use_v4_jit_flag = 1;
cn_slow_hash(data, len, hash1, 4, 0, height);
return memcmp(hash0, hash1, 32);
}
int main(int argc, char **argv)
{
uint8_t data[64];
uint64_t start_height = 1788000;
uint64_t end_height = 1788001;
if (argc != 1 && argc != 2 && argc != 3)
{
fprintf(stderr, "usage: %s [<start_height> [<end_height>]]\n", argv[0]);
return 1;
}
if (argc > 1)
{
errno = 0;
start_height = strtoull(argv[1], NULL, 10);
if ((start_height == 0 && errno) || start_height == ULLONG_MAX)
{
fprintf(stderr, "invalid start_height\n");
return 1;
}
end_height = start_height;
if (argc > 2)
{
errno = 0;
end_height = strtoull(argv[2], NULL, 10);
if ((end_height == 0 && errno) || end_height == ULLONG_MAX)
{
fprintf(stderr, "invalid end_height\n");
return 1;
}
}
}
if (start_height == end_height)
{
uint64_t counter = 0;
while (1)
{
printf("\r%llu", (unsigned long long)counter);
fflush(stdout);
size_t offset = 0;
while (offset + 8 < sizeof(data))
{
memcpy(data + offset, &counter, sizeof(counter));
offset += 8;
}
if (test(data, sizeof(data), start_height))
{
fprintf(stderr, "\nFailure at height %llu, counter %llu\n", (unsigned long long)start_height, (unsigned long long)counter);
return 0;
}
++counter;
}
}
memset(data, 0x42, sizeof(data));
for (uint64_t h = start_height; h < end_height; ++h)
{
printf("\r%llu/%llu", (unsigned long long)(h-start_height), (unsigned long long)(end_height-start_height));
fflush(stdout);
if (test(data, sizeof(data), h))
{
fprintf(stderr, "\nFailure at height %llu\n", (unsigned long long)h);
return 0;
}
}
printf("\r");
return 0;
}

View File

@ -34,6 +34,7 @@
#include "crypto/crypto.h" #include "crypto/crypto.h"
#include "cryptonote_basic/cryptonote_basic.h" #include "cryptonote_basic/cryptonote_basic.h"
template<unsigned int variant>
class test_cn_slow_hash class test_cn_slow_hash
{ {
public: public:
@ -42,18 +43,15 @@ public:
#pragma pack(push, 1) #pragma pack(push, 1)
struct data_t struct data_t
{ {
char data[13]; char data[43];
}; };
#pragma pack(pop) #pragma pack(pop)
static_assert(13 == sizeof(data_t), "Invalid structure size"); static_assert(43 == sizeof(data_t), "Invalid structure size");
bool init() bool init()
{ {
if (!epee::string_tools::hex_to_pod("63617665617420656d70746f72", m_data)) if (!epee::string_tools::hex_to_pod("63617665617420656d70746f763617665617420656d70746f72263617665617420656d70746f7201020304", m_data))
return false;
if (!epee::string_tools::hex_to_pod("bbec2cacf69866a8e740380fe7b818fc78f8571221742d729d9d02d7f8989b87", m_expected_hash))
return false; return false;
return true; return true;
@ -62,11 +60,10 @@ public:
bool test() bool test()
{ {
crypto::hash hash; crypto::hash hash;
crypto::cn_slow_hash(&m_data, sizeof(m_data), hash); crypto::cn_slow_hash(&m_data, sizeof(m_data), hash, variant);
return hash == m_expected_hash; return true;
} }
private: private:
data_t m_data; data_t m_data;
crypto::hash m_expected_hash;
}; };

View File

@ -193,7 +193,10 @@ int main(int argc, char** argv)
TEST_PERFORMANCE2(filter, p, test_wallet2_expand_subaddresses, 50, 200); TEST_PERFORMANCE2(filter, p, test_wallet2_expand_subaddresses, 50, 200);
TEST_PERFORMANCE0(filter, p, test_cn_slow_hash); TEST_PERFORMANCE1(filter, p, test_cn_slow_hash, 0);
TEST_PERFORMANCE1(filter, p, test_cn_slow_hash, 1);
TEST_PERFORMANCE1(filter, p, test_cn_slow_hash, 2);
TEST_PERFORMANCE1(filter, p, test_cn_slow_hash, 4);
TEST_PERFORMANCE1(filter, p, test_cn_fast_hash, 32); TEST_PERFORMANCE1(filter, p, test_cn_fast_hash, 32);
TEST_PERFORMANCE1(filter, p, test_cn_fast_hash, 16384); TEST_PERFORMANCE1(filter, p, test_cn_fast_hash, 16384);