From 5f1b9a23132408160cf0b99e1e162413ab0e382b Mon Sep 17 00:00:00 2001
From: toasteater <48371905+toasteater@users.noreply.github.com>
Date: Mon, 18 Mar 2019 11:41:16 +0800
Subject: [PATCH] Improved uniformity of RandomPCG::randf.

When generating single precision floats, Godot casts a uint32_t to float,
causing uniformity loss.

This new randf, inspired by T. R. Campbell's random_real, samples the output
of rand as the fraction part of an infinite binary number, with some tricks
to reduce ops and branching. This method provides "good enough" uniformity at
decent speed, for floats greater than 2^-64. Smaller numbers are floored to 0.
---
 core/math/random_pcg.cpp |  8 ++----
 core/math/random_pcg.h   | 62 ++++++++++++++++++++++++++++++++++++++--
 2 files changed, 62 insertions(+), 8 deletions(-)

diff --git a/core/math/random_pcg.cpp b/core/math/random_pcg.cpp
index 45467b32b2..b82b24ac4d 100644
--- a/core/math/random_pcg.cpp
+++ b/core/math/random_pcg.cpp
@@ -44,13 +44,9 @@ void RandomPCG::randomize() {
 }
 
 double RandomPCG::random(double p_from, double p_to) {
-	unsigned int r = rand();
-	double ret = (double)r / (double)RANDOM_MAX;
-	return (ret) * (p_to - p_from) + p_from;
+	return randd() * (p_to - p_from) + p_from;
 }
 
 float RandomPCG::random(float p_from, float p_to) {
-	unsigned int r = rand();
-	float ret = (float)r / (float)RANDOM_MAX;
-	return (ret) * (p_to - p_from) + p_from;
+	return randf() * (p_to - p_from) + p_from;
 }
diff --git a/core/math/random_pcg.h b/core/math/random_pcg.h
index 230eb9a11b..e48b1b9551 100644
--- a/core/math/random_pcg.h
+++ b/core/math/random_pcg.h
@@ -35,6 +35,28 @@
 
 #include "thirdparty/misc/pcg.h"
 
+#if defined(__GNUC__) || (_llvm_has_builtin(__builtin_clz))
+#define CLZ32(x) __builtin_clz(x)
+#elif defined(_MSC_VER)
+#include "intrin.h"
+static int __bsr_clz32(uint32_t x) {
+	unsigned long index;
+	_BitScanReverse(&index, x);
+	return 31 - index;
+}
+#define CLZ32(x) __bsr_clz32(x)
+#else
+#endif
+
+#if defined(__GNUC__) || (_llvm_has_builtin(__builtin_ldexp) && _llvm_has_builtin(__builtin_ldexpf))
+#define LDEXP(s, e) __builtin_ldexp(s, e)
+#define LDEXPF(s, e) __builtin_ldexpf(s, e)
+#else
+#include "math.h"
+#define LDEXP(s, e) ldexp(s, e)
+#define LDEXPF(s, e) ldexp(s, e)
+#endif
+
 class RandomPCG {
 	pcg32_random_t pcg;
 	uint64_t current_seed; // seed with this to get the same state
@@ -58,8 +80,44 @@ public:
 		current_seed = pcg.state;
 		return pcg32_random_r(&pcg);
 	}
-	_FORCE_INLINE_ double randd() { return (double)rand() / (double)RANDOM_MAX; }
-	_FORCE_INLINE_ float randf() { return (float)rand() / (float)RANDOM_MAX; }
+
+	// Obtaining floating point numbers in [0, 1] range with "good enough" uniformity.
+	// These functions sample the output of rand() as the fraction part of an infinite binary number,
+	// with some tricks applied to reduce ops and branching:
+	// 1. Instead of shifting to the first 1 and connecting random bits, we simply set the MSB and LSB to 1.
+	//    Provided that the RNG is actually uniform bit by bit, this should have the exact same effect.
+	// 2. In order to compensate for exponent info loss, we count zeros from another random number,
+	//    and just add that to the initial offset.
+	//    This has the same probability as counting and shifting an actual bit stream: 2^-n for n zeroes.
+	// For all numbers above 2^-96 (2^-64 for floats), the functions should be uniform.
+	// However, all numbers below that threshold are floored to 0.
+	// The thresholds are chosen to minimize rand() calls while keeping the numbers within a totally subjective quality standard.
+	// If clz or ldexp isn't available, fall back to bit truncation for performance, sacrificing uniformity.
+	_FORCE_INLINE_ double randd() {
+#if defined(CLZ32)
+		uint32_t proto_exp_offset = rand();
+		if (unlikely(proto_exp_offset == 0)) {
+			return 0;
+		}
+		uint64_t significand = (((uint64_t)rand()) << 32) | rand() | 0x8000000000000001U;
+		return LDEXP((double)significand, -64 - CLZ32(proto_exp_offset));
+#else
+#pragma message("RandomPCG::randd - intrinsic clz is not available, falling back to bit truncation")
+		return (double)(((((uint64_t)rand()) << 32) | rand()) & 0x1FFFFFFFFFFFFFU) / (double)0x1FFFFFFFFFFFFFU;
+#endif
+	}
+	_FORCE_INLINE_ float randf() {
+#if defined(CLZ32)
+		uint32_t proto_exp_offset = rand();
+		if (unlikely(proto_exp_offset == 0)) {
+			return 0;
+		}
+		return LDEXPF((float)(rand() | 0x80000001), -32 - CLZ32(proto_exp_offset));
+#else
+#pragma message("RandomPCG::randf - intrinsic clz is not available, falling back to bit truncation")
+		return (float)(rand() & 0xFFFFFF) / (float)0xFFFFFF;
+#endif
+	}
 
 	double random(double p_from, double p_to);
 	float random(float p_from, float p_to);