minor performance improvements for philox
This commit is contained in:
parent
fca42949a3
commit
0904df84e2
|
@ -26,7 +26,7 @@ two_pow32_inv_2pi = np.array([2.3283064e-10 * 6.2831855], dtype=np.float32)
|
||||||
|
|
||||||
def uint32(x):
|
def uint32(x):
|
||||||
"""Converts (N,) np.uint64 array into (2, N) np.unit32 array."""
|
"""Converts (N,) np.uint64 array into (2, N) np.unit32 array."""
|
||||||
return np.moveaxis(x.view(np.uint32).reshape(-1, 2), 0, 1)
|
return x.view(np.uint32).reshape(-1, 2).transpose(1, 0)
|
||||||
|
|
||||||
|
|
||||||
def philox4_round(counter, key):
|
def philox4_round(counter, key):
|
||||||
|
@ -65,8 +65,8 @@ def philox4_32(counter, key, rounds=10):
|
||||||
|
|
||||||
def box_muller(x, y):
|
def box_muller(x, y):
|
||||||
"""Returns just the first out of two numbers generated by Box–Muller transform algorithm."""
|
"""Returns just the first out of two numbers generated by Box–Muller transform algorithm."""
|
||||||
u = x.astype(np.float32) * two_pow32_inv + two_pow32_inv / 2
|
u = x * two_pow32_inv + two_pow32_inv / 2
|
||||||
v = y.astype(np.float32) * two_pow32_inv_2pi + two_pow32_inv_2pi / 2
|
v = y * two_pow32_inv_2pi + two_pow32_inv_2pi / 2
|
||||||
|
|
||||||
s = np.sqrt(-2.0 * np.log(u))
|
s = np.sqrt(-2.0 * np.log(u))
|
||||||
|
|
||||||
|
@ -93,7 +93,9 @@ class Generator:
|
||||||
counter[2] = np.arange(n, dtype=np.uint32) # up to 2^32 numbers can be generated - if you want more you'd need to spill into counter[3]
|
counter[2] = np.arange(n, dtype=np.uint32) # up to 2^32 numbers can be generated - if you want more you'd need to spill into counter[3]
|
||||||
self.offset += 1
|
self.offset += 1
|
||||||
|
|
||||||
key = uint32(np.array([[self.seed] * n], dtype=np.uint64))
|
key = np.empty(n, dtype=np.uint64)
|
||||||
|
key.fill(self.seed)
|
||||||
|
key = uint32(key)
|
||||||
|
|
||||||
g = philox4_32(counter, key)
|
g = philox4_32(counter, key)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue