diff --git a/src/crypto/slow-hash.c b/src/crypto/slow-hash.c index 53628ab18..38aeeee54 100644 --- a/src/crypto/slow-hash.c +++ b/src/crypto/slow-hash.c @@ -51,6 +51,12 @@ #define INIT_SIZE_BLK 8 #define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE) +#if defined(_MSC_VER) +#define THREADV __declspec(thread) +#else +#define THREADV __thread +#endif + extern void aesb_single_round(const uint8_t *in, uint8_t *out, const uint8_t *expandedKey); extern void aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *expandedKey); @@ -459,12 +465,6 @@ static inline int force_software_aes(void) _b1 = _b; \ _b = _c; \ -#if defined(_MSC_VER) -#define THREADV __declspec(thread) -#else -#define THREADV __thread -#endif - #pragma pack(push, 1) union cn_slow_hash_state { @@ -1012,6 +1012,44 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int } #elif !defined NO_AES && (defined(__arm__) || defined(__aarch64__)) +#ifdef __aarch64__ +#include +THREADV uint8_t *hp_state = NULL; +THREADV int hp_malloced = 0; + +void cn_slow_hash_allocate_state(void) +{ + if(hp_state != NULL) + return; + +#ifndef MAP_HUGETLB +#define MAP_HUGETLB 0 +#endif + hp_state = mmap(0, MEMORY, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON | MAP_HUGETLB, -1, 0); + + if(hp_state == MAP_FAILED) + hp_state = NULL; + if(hp_state == NULL) + { + hp_malloced = 1; + hp_state = (uint8_t *) malloc(MEMORY); + } +} + +void cn_slow_hash_free_state(void) +{ + if(hp_state == NULL) + return; + + if (hp_malloced) + free(hp_state); + else + munmap(hp_state, MEMORY); + hp_state = NULL; + hp_malloced = 0; +} +#else void cn_slow_hash_allocate_state(void) { // Do nothing, this is just to maintain compatibility with the upgraded slow-hash.c @@ -1023,6 +1061,7 @@ void cn_slow_hash_free_state(void) // As above return; } +#endif #if defined(__GNUC__) #define RDATA_ALIGN16 __attribute__ ((aligned(16))) @@ -1272,12 +1311,7 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int { RDATA_ALIGN16 uint8_t expandedKey[240]; -#ifndef FORCE_USE_HEAP - RDATA_ALIGN16 uint8_t local_hp_state[MEMORY]; -#else - uint8_t *local_hp_state = (uint8_t *)aligned_malloc(MEMORY,16); -#endif - + uint8_t *local_hp_state; uint8_t text[INIT_SIZE_BYTE]; RDATA_ALIGN16 uint64_t a[2]; RDATA_ALIGN16 uint64_t b[4]; @@ -1296,6 +1330,14 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int hash_extra_blake, hash_extra_groestl, hash_extra_jh, hash_extra_skein }; + // this isn't supposed to happen, but guard against it for now. + if(hp_state == NULL) + cn_slow_hash_allocate_state(); + + // locals to avoid constant TLS dereferencing + local_hp_state = hp_state; + + // locals to avoid constant TLS dereferencing /* CryptoNight Step 1: Use Keccak1600 to initialize the 'state' (and 'text') buffers from the data. */ if (prehashed) { @@ -1409,10 +1451,6 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int memcpy(state.init, text, INIT_SIZE_BYTE); hash_permutation(&state.hs); extra_hashes[state.hs.b[0] & 3](&state, 200, hash); - -#ifdef FORCE_USE_HEAP - aligned_free(local_hp_state); -#endif } #else /* aarch64 && crypto */