Optimized keccak implementation
All tests were conducted on the same PC (Ryzen 5 5600X running at fixed 4.65 GHz). Before: test_cn_fast_hash<32> (100000 calls) - OK: 1 us/call test_cn_fast_hash<16384> (1000 calls) - OK: 164 us/call After: test_cn_fast_hash<32> (100000 calls) - OK: 0 us/call test_cn_fast_hash<16384> (1000 calls) - OK: 31 us/call More than 5 times speedup for cn_fast_hash. Also noticed consistent 1-2% improvement in test_construct_tx results.
This commit is contained in:
parent
f49fc9b487
commit
268a0393e9
|
@ -31,54 +31,83 @@ const uint64_t keccakf_rndc[24] =
|
||||||
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
|
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
|
||||||
};
|
};
|
||||||
|
|
||||||
const int keccakf_rotc[24] =
|
|
||||||
{
|
|
||||||
1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
|
|
||||||
27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44
|
|
||||||
};
|
|
||||||
|
|
||||||
const int keccakf_piln[24] =
|
|
||||||
{
|
|
||||||
10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
|
|
||||||
15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1
|
|
||||||
};
|
|
||||||
|
|
||||||
// update the state with given number of rounds
|
// update the state with given number of rounds
|
||||||
|
|
||||||
void keccakf(uint64_t st[25], int rounds)
|
void keccakf(uint64_t st[25], int rounds)
|
||||||
{
|
{
|
||||||
int i, j, round;
|
int round;
|
||||||
uint64_t t, bc[5];
|
uint64_t t, bc[5];
|
||||||
|
|
||||||
for (round = 0; round < rounds; round++) {
|
for (round = 0; round < rounds; ++round) {
|
||||||
|
|
||||||
// Theta
|
// Theta
|
||||||
for (i = 0; i < 5; i++)
|
bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20];
|
||||||
bc[i] = st[i] ^ st[i + 5] ^ st[i + 10] ^ st[i + 15] ^ st[i + 20];
|
bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21];
|
||||||
|
bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22];
|
||||||
|
bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23];
|
||||||
|
bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24];
|
||||||
|
|
||||||
for (i = 0; i < 5; i++) {
|
#define THETA(i) { \
|
||||||
t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1);
|
t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1); \
|
||||||
for (j = 0; j < 25; j += 5)
|
st[i ] ^= t; \
|
||||||
st[j + i] ^= t;
|
st[i + 5] ^= t; \
|
||||||
|
st[i + 10] ^= t; \
|
||||||
|
st[i + 15] ^= t; \
|
||||||
|
st[i + 20] ^= t; \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
THETA(0);
|
||||||
|
THETA(1);
|
||||||
|
THETA(2);
|
||||||
|
THETA(3);
|
||||||
|
THETA(4);
|
||||||
|
|
||||||
// Rho Pi
|
// Rho Pi
|
||||||
t = st[1];
|
t = st[1];
|
||||||
for (i = 0; i < 24; i++) {
|
st[ 1] = ROTL64(st[ 6], 44);
|
||||||
j = keccakf_piln[i];
|
st[ 6] = ROTL64(st[ 9], 20);
|
||||||
bc[0] = st[j];
|
st[ 9] = ROTL64(st[22], 61);
|
||||||
st[j] = ROTL64(t, keccakf_rotc[i]);
|
st[22] = ROTL64(st[14], 39);
|
||||||
t = bc[0];
|
st[14] = ROTL64(st[20], 18);
|
||||||
}
|
st[20] = ROTL64(st[ 2], 62);
|
||||||
|
st[ 2] = ROTL64(st[12], 43);
|
||||||
|
st[12] = ROTL64(st[13], 25);
|
||||||
|
st[13] = ROTL64(st[19], 8);
|
||||||
|
st[19] = ROTL64(st[23], 56);
|
||||||
|
st[23] = ROTL64(st[15], 41);
|
||||||
|
st[15] = ROTL64(st[ 4], 27);
|
||||||
|
st[ 4] = ROTL64(st[24], 14);
|
||||||
|
st[24] = ROTL64(st[21], 2);
|
||||||
|
st[21] = ROTL64(st[ 8], 55);
|
||||||
|
st[ 8] = ROTL64(st[16], 45);
|
||||||
|
st[16] = ROTL64(st[ 5], 36);
|
||||||
|
st[ 5] = ROTL64(st[ 3], 28);
|
||||||
|
st[ 3] = ROTL64(st[18], 21);
|
||||||
|
st[18] = ROTL64(st[17], 15);
|
||||||
|
st[17] = ROTL64(st[11], 10);
|
||||||
|
st[11] = ROTL64(st[ 7], 6);
|
||||||
|
st[ 7] = ROTL64(st[10], 3);
|
||||||
|
st[10] = ROTL64(t, 1);
|
||||||
|
|
||||||
// Chi
|
// Chi
|
||||||
for (j = 0; j < 25; j += 5) {
|
#define CHI(j) { \
|
||||||
for (i = 0; i < 5; i++)
|
const uint64_t st0 = st[j ]; \
|
||||||
bc[i] = st[j + i];
|
const uint64_t st1 = st[j + 1]; \
|
||||||
for (i = 0; i < 5; i++)
|
const uint64_t st2 = st[j + 2]; \
|
||||||
st[j + i] ^= (~bc[(i + 1) % 5]) & bc[(i + 2) % 5];
|
const uint64_t st3 = st[j + 3]; \
|
||||||
|
const uint64_t st4 = st[j + 4]; \
|
||||||
|
st[j ] ^= ~st1 & st2; \
|
||||||
|
st[j + 1] ^= ~st2 & st3; \
|
||||||
|
st[j + 2] ^= ~st3 & st4; \
|
||||||
|
st[j + 3] ^= ~st4 & st0; \
|
||||||
|
st[j + 4] ^= ~st0 & st1; \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CHI( 0);
|
||||||
|
CHI( 5);
|
||||||
|
CHI(10);
|
||||||
|
CHI(15);
|
||||||
|
CHI(20);
|
||||||
|
|
||||||
// Iota
|
// Iota
|
||||||
st[0] ^= keccakf_rndc[round];
|
st[0] ^= keccakf_rndc[round];
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue