Merge pull request #4781
2a48c2a2
slow-hash: some more big endian fixes (xiphon)b39fdf8e
slow-hash: fix for big endian (moneromooo-monero)
This commit is contained in:
commit
84dd674cd0
|
@ -109,8 +109,8 @@ extern void aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *ex
|
||||||
memcpy(b + AES_BLOCK_SIZE, state.hs.b + 64, AES_BLOCK_SIZE); \
|
memcpy(b + AES_BLOCK_SIZE, state.hs.b + 64, AES_BLOCK_SIZE); \
|
||||||
xor64(b + AES_BLOCK_SIZE, state.hs.b + 80); \
|
xor64(b + AES_BLOCK_SIZE, state.hs.b + 80); \
|
||||||
xor64(b + AES_BLOCK_SIZE + 8, state.hs.b + 88); \
|
xor64(b + AES_BLOCK_SIZE + 8, state.hs.b + 88); \
|
||||||
division_result = state.hs.w[12]; \
|
division_result = SWAP64LE(state.hs.w[12]); \
|
||||||
sqrt_result = state.hs.w[13]; \
|
sqrt_result = SWAP64LE(state.hs.w[13]); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define VARIANT2_SHUFFLE_ADD_SSE2(base_ptr, offset) \
|
#define VARIANT2_SHUFFLE_ADD_SSE2(base_ptr, offset) \
|
||||||
|
@ -145,30 +145,31 @@ extern void aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *ex
|
||||||
const uint64_t chunk1_old[2] = { chunk1[0], chunk1[1] }; \
|
const uint64_t chunk1_old[2] = { chunk1[0], chunk1[1] }; \
|
||||||
\
|
\
|
||||||
uint64_t b1[2]; \
|
uint64_t b1[2]; \
|
||||||
memcpy(b1, b + 16, 16); \
|
memcpy_swap64le(b1, b + 16, 2); \
|
||||||
chunk1[0] = chunk3[0] + b1[0]; \
|
chunk1[0] = SWAP64LE(SWAP64LE(chunk3[0]) + b1[0]); \
|
||||||
chunk1[1] = chunk3[1] + b1[1]; \
|
chunk1[1] = SWAP64LE(SWAP64LE(chunk3[1]) + b1[1]); \
|
||||||
\
|
\
|
||||||
uint64_t a0[2]; \
|
uint64_t a0[2]; \
|
||||||
memcpy(a0, a, 16); \
|
memcpy_swap64le(a0, a, 2); \
|
||||||
chunk3[0] = chunk2[0] + a0[0]; \
|
chunk3[0] = SWAP64LE(SWAP64LE(chunk2[0]) + a0[0]); \
|
||||||
chunk3[1] = chunk2[1] + a0[1]; \
|
chunk3[1] = SWAP64LE(SWAP64LE(chunk2[1]) + a0[1]); \
|
||||||
\
|
\
|
||||||
uint64_t b0[2]; \
|
uint64_t b0[2]; \
|
||||||
memcpy(b0, b, 16); \
|
memcpy_swap64le(b0, b, 2); \
|
||||||
chunk2[0] = chunk1_old[0] + b0[0]; \
|
chunk2[0] = SWAP64LE(SWAP64LE(chunk1_old[0]) + b0[0]); \
|
||||||
chunk2[1] = chunk1_old[1] + b0[1]; \
|
chunk2[1] = SWAP64LE(SWAP64LE(chunk1_old[1]) + b0[1]); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define VARIANT2_INTEGER_MATH_DIVISION_STEP(b, ptr) \
|
#define VARIANT2_INTEGER_MATH_DIVISION_STEP(b, ptr) \
|
||||||
((uint64_t*)(b))[0] ^= division_result ^ (sqrt_result << 32); \
|
uint64_t tmpx = division_result ^ (sqrt_result << 32); \
|
||||||
|
((uint64_t*)(b))[0] ^= SWAP64LE(tmpx); \
|
||||||
{ \
|
{ \
|
||||||
const uint64_t dividend = ((uint64_t*)(ptr))[1]; \
|
const uint64_t dividend = SWAP64LE(((uint64_t*)(ptr))[1]); \
|
||||||
const uint32_t divisor = (((uint64_t*)(ptr))[0] + (uint32_t)(sqrt_result << 1)) | 0x80000001UL; \
|
const uint32_t divisor = (SWAP64LE(((uint64_t*)(ptr))[0]) + (uint32_t)(sqrt_result << 1)) | 0x80000001UL; \
|
||||||
division_result = ((uint32_t)(dividend / divisor)) + \
|
division_result = ((uint32_t)(dividend / divisor)) + \
|
||||||
(((uint64_t)(dividend % divisor)) << 32); \
|
(((uint64_t)(dividend % divisor)) << 32); \
|
||||||
} \
|
} \
|
||||||
const uint64_t sqrt_input = ((uint64_t*)(ptr))[0] + division_result
|
const uint64_t sqrt_input = SWAP64LE(((uint64_t*)(ptr))[0]) + division_result
|
||||||
|
|
||||||
#define VARIANT2_INTEGER_MATH_SSE2(b, ptr) \
|
#define VARIANT2_INTEGER_MATH_SSE2(b, ptr) \
|
||||||
do if (variant >= 2) \
|
do if (variant >= 2) \
|
||||||
|
@ -207,10 +208,10 @@ extern void aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *ex
|
||||||
#define VARIANT2_2() \
|
#define VARIANT2_2() \
|
||||||
do if (variant >= 2) \
|
do if (variant >= 2) \
|
||||||
{ \
|
{ \
|
||||||
*U64(hp_state + (j ^ 0x10)) ^= hi; \
|
*U64(hp_state + (j ^ 0x10)) ^= SWAP64LE(hi); \
|
||||||
*(U64(hp_state + (j ^ 0x10)) + 1) ^= lo; \
|
*(U64(hp_state + (j ^ 0x10)) + 1) ^= SWAP64LE(lo); \
|
||||||
hi ^= *U64(hp_state + (j ^ 0x20)); \
|
hi ^= SWAP64LE(*U64(hp_state + (j ^ 0x20))); \
|
||||||
lo ^= *(U64(hp_state + (j ^ 0x20)) + 1); \
|
lo ^= SWAP64LE(*(U64(hp_state + (j ^ 0x20)) + 1)); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
|
||||||
|
@ -1408,7 +1409,7 @@ static void (*const extra_hashes[4])(const void *, size_t, char *) = {
|
||||||
hash_extra_blake, hash_extra_groestl, hash_extra_jh, hash_extra_skein
|
hash_extra_blake, hash_extra_groestl, hash_extra_jh, hash_extra_skein
|
||||||
};
|
};
|
||||||
|
|
||||||
static size_t e2i(const uint8_t* a, size_t count) { return (*((uint64_t*)a) / AES_BLOCK_SIZE) & (count - 1); }
|
static size_t e2i(const uint8_t* a, size_t count) { return (SWAP64LE(*((uint64_t*)a)) / AES_BLOCK_SIZE) & (count - 1); }
|
||||||
|
|
||||||
static void mul(const uint8_t* a, const uint8_t* b, uint8_t* res) {
|
static void mul(const uint8_t* a, const uint8_t* b, uint8_t* res) {
|
||||||
uint64_t a0, b0;
|
uint64_t a0, b0;
|
||||||
|
|
Loading…
Reference in New Issue