stdlib: Simplify arc4random_uniform

It uses the bitmask with rejection [1], which calculates a mask
being the lowest power of two bounding the request upper bound,
successively queries new random values, and rejects values
outside the requested range.

Performance-wise, there is no much gain in trying to conserve
bits since arc4random is wrapper on getrandom syscall.  It should
be cheaper to just query a uint32_t value.  The algorithm also
avoids modulo and divide operations, which might be costly
depending of the architecture.

[1] https://www.pcg-random.org/posts/bounded-rands.html

Reviewed-by: Yann Droneaud <ydroneaud@opteya.com>
This commit is contained in:
Adhemerval Zanella 2022-07-28 09:18:01 -03:00
parent 7187efd0aa
commit c622ac1b86
1 changed files with 28 additions and 97 deletions

View File

@ -17,38 +17,19 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <endian.h>
#include <libc-lock.h>
#include <stdlib.h>
#include <sys/param.h>
/* Return the number of bytes which cover values up to the limit. */
__attribute__ ((const))
static uint32_t
byte_count (uint32_t n)
{
if (n < (1U << 8))
return 1;
else if (n < (1U << 16))
return 2;
else if (n < (1U << 24))
return 3;
else
return 4;
}
/* Return a uniformly distributed random number less than N. The algorithm
calculates a mask being the lowest power of two bounding the upper bound
N, successively queries new random values, and rejects values outside of
the request range.
/* Fill the lower bits of the result with randomness, according to the
number of bytes requested. */
static void
random_bytes (uint32_t *result, uint32_t byte_count)
{
*result = 0;
unsigned char *ptr = (unsigned char *) result;
if (__BYTE_ORDER == __BIG_ENDIAN)
ptr += 4 - byte_count;
__arc4random_buf (ptr, byte_count);
}
For reject values, it also tries if the remaining entropy could fit on
the asked range after range adjustment.
The algorithm avoids modulo and divide operations, which might be costly
depending on the architecture. */
uint32_t
__arc4random_uniform (uint32_t n)
{
@ -57,83 +38,33 @@ __arc4random_uniform (uint32_t n)
only possible result for limit 1. */
return 0;
/* The bits variable serves as a source for bits. Prefetch the
minimum number of bytes needed. */
uint32_t count = byte_count (n);
uint32_t bits_length = count * CHAR_BIT;
uint32_t bits;
random_bytes (&bits, count);
/* Powers of two are easy. */
if (powerof2 (n))
return bits & (n - 1);
return __arc4random () & (n - 1);
/* The general case. This algorithm follows Jérémie Lumbroso,
Optimal Discrete Uniform Generation from Coin Flips, and
Applications (2013), who credits Donald E. Knuth and Andrew
C. Yao, The complexity of nonuniform random number generation
(1976), for solving the general case.
/* mask is the smallest power of 2 minus 1 number larger than n. */
int z = __builtin_clz (n);
uint32_t mask = ~UINT32_C(0) >> z;
int bits = CHAR_BIT * sizeof (uint32_t) - z;
The implementation below unrolls the initialization stage of the
loop, where v is less than n. */
/* Use 64-bit variables even though the intermediate results are
never larger than 33 bits. This ensures the code is easier to
compile on 64-bit architectures. */
uint64_t v;
uint64_t c;
/* Initialize v and c. v is the smallest power of 2 which is larger
than n.*/
{
uint32_t log2p1 = 32 - __builtin_clz (n);
v = 1ULL << log2p1;
c = bits & (v - 1);
bits >>= log2p1;
bits_length -= log2p1;
}
/* At the start of the loop, c is uniformly distributed within the
half-open interval [0, v), and v < 2n < 2**33. */
while (true)
while (1)
{
if (v >= n)
{
/* If the candidate is less than n, accept it. */
if (c < n)
/* c is uniformly distributed on [0, n). */
return c;
else
{
/* c is uniformly distributed on [n, v). */
v -= n;
c -= n;
/* The distribution was shifted, so c is uniformly
distributed on [0, v) again. */
}
}
/* v < n here. */
uint32_t value = __arc4random ();
/* Replenish the bit source if necessary. */
if (bits_length == 0)
{
/* Overwrite the least significant byte. */
random_bytes (&bits, 1);
bits_length = CHAR_BIT;
}
/* Return if the lower power of 2 minus 1 satisfy the condition. */
uint32_t r = value & mask;
if (r < n)
return r;
/* Double the range. No overflow because v < n < 2**32. */
v *= 2;
/* v < 2n here. */
/* Extract a bit and append it to c. c remains less than v and
thus 2**33. */
c = (c << 1) | (bits & 1);
bits >>= 1;
--bits_length;
/* At this point, c is uniformly distributed on [0, v) again,
and v < 2n < 2**33. */
/* Otherwise check if remaining bits of entropy provides fits in the
bound. */
for (int bits_left = z; bits_left >= bits; bits_left -= bits)
{
value >>= bits;
r = value & mask;
if (r < n)
return r;
}
}
}
libc_hidden_def (__arc4random_uniform)