Algebra, Straight Lines, and Logarithms Oh My!
By: Lee
Press the Right key to navigate these slides.
setup_html = r'''
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/github-fork-ribbon-css/0.2.0/gh-fork-ribbon.min.css" />
<!--[if lt IE 9]>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/github-fork-ribbon-css/0.2.0/gh-fork-ribbon.ie.min.css" />
<![endif]-->
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-2209617-11', 'auto');
ga('send', 'pageview');
</script>
<style>
.github-fork-ribbon::after {line-height: initial !important;padding: initial !important;}
.github-fork-ribbon {font-size: 14px;}
.navigate-up, .navigate-down {
display: none !important;
}
</style>
<script>
$(document).ready(function() {
$("body").append('<a class="github-fork-ribbon" href="http://www.bullshitmath.lol" title="Bullshit Math">Bullshit Math</a>')
});
</script>
'''
IPython.display.display_html(setup_html, raw=True)
hide_code_in_slideshow()
This is actual code used in Quake and other early 3D games to help us calculate $\frac{1}{\sqrt{x}}$, seriously
float rsqrt(float x) {
long i = * ( long * ) &x; // floating point bit hack
i = 0x5f3759df - ( i >> 1 ); // what the fuck?
return * ( float * ) &i;
}
Supposedly,
$$
\textsf{rsqrt}(x) \approx \frac{1}{\sqrt{x}}
$$
and the secret is unlocked behind this impenetrable magical constant 0x5f3759df
.
%matplotlib inline
from struct import pack, unpack
import numpy as np
import matplotlib.pyplot as plt
@np.vectorize
def sharp(x):
return unpack('I', pack('f', x))[0]
@np.vectorize
def flat(y):
return unpack('f', pack('I', int(y) & 0xffffffff))[0]
star_long_star_amp = sharp;
star_float_star_amp = flat;
hide_code_in_slideshow();
@np.vectorize
def rsqrt(x): # float rsqrt(float x) {
i = star_long_star_amp(x); # long i = * ( long * ) &x;
i = 0x5f3759df - ( i >> 1 ); # i = 0x5f3759df - ( i >> 1 );
return star_float_star_amp(i); # return * ( float * ) &i;
# }
# Construct a plot
fig = plt.figure(figsize=(16,8));
ax = plt.axes();
# Plot the approximation and the actual inverse sqrt function
x = np.linspace(1, 50, 5000);
approximation, = ax.plot(x, rsqrt(x))
actual, = ax.plot(x, 1/np.sqrt(x))
fig.legend(handles=[approximation, actual], labels=[r'qsqrt(x)', r'$\frac{1}{\sqrt{x}}$'], fontsize=20);
fig.suptitle(r"$\frac{1}{\sqrt{x}}$ versus qsqrt(x)", fontsize=26);
hide_code_in_slideshow()
0x5f3759df
¶Where did this thing come from?
Should we open up Pandora's Box and peer inside? We are but mere mortals after all.
In fact, I believe that the idea behind this technique is so easy that an $8^{th}$ grader can figure it all out.
Recall that the fast inverse square root method looks like
long i = * ( long * ) &x;
i = 0x5f3759df - ( i >> 1 );
return * ( float * ) &i;
Let's look at those two casts:
*(long*)&
takes a float
and outputs a long
*(float*)&
takes a long
and outputs a float
Let's name these two operations the float-to-long ($\textsf{f2l}: \textsf{float} \to \textsf{long}$) and the long-to-float ($\textsf{l2f}: \textsf{long} \to \textsf{float}$) operators.
$$ \textsf{rsqrt}(x) = \textsf{l2f}\left(\textsf{0x5f3759df} - \frac{1}{2} \textsf{f2l}\left(x\right)\right) $$What would this look like in Python?
from struct import pack, unpack
to_long = lambda hole: unpack('i', hole)[0] # y = (long*) x
to_float = lambda hole: unpack('f', hole)[0] # y = (float*) x
from_long = lambda hole: pack('i', int(hole) % 0x80000000) # long* y = &x
from_float = lambda hole: pack('f', float(hole)) # float* y = &x
hide_code_in_slideshow()
@np.vectorize
def f2l(x):
return to_long(from_float(x))
@np.vectorize
def l2f(y):
return to_float(from_long(y))
Here, $$\begin{align*} \textsf{f2l}(x) &= *(\textsf{long}*) \textsf{&} x \\ \textsf{l2f}(y) &= *(\textrm{float}*) \textsf{&} y \end{align*} $$
Don't worry too much about the implementation details.
What do we know about $\textsf{f2l}$ and $\textsf{l2f}$?
e.g. If you convert a float to a long and then convert that long back to a float, you get the same float back.
e.g. If you convert 0 from a float to a long, it's still 0.
There are a few other algebraic properties of f2l
and l2f
, but unfortunately, they're pretty unstructured. In particular, $x + y \ne \textsf{l2f}(\textsf{f2l}(x) + \textsf{f2l}(y))$:
int( l2f(f2l(1) + f2l(1)) ) # 1 + 1 is ...
170141183460469231731687303715884105728
0x5f3759df
for a moment.rsqrt
, the other constant is the $-\frac{1}{2}$.Since we don't really know what it's doing, let's just name it as if we don't know what we're doing:
$$ \textsf{foobar}_{M,C}(x) = \textsf{l2f}\left(M + C \cdot \textsf{f2l}(x)\right) $$def foobar(M, C):
return np.vectorize(lambda x: l2f(M + C * f2l(x)))
# rsqrt(x) is instantiated with M = 0x5f3759df and C = -1/2
rsqrt = foobar(0x5f3759df, -1.0/2.0)
Recall that the log-log
plot for $y = M \cdot x^C$ is linear because
where $\log(y)$ varies linearly with $\log(x)$ with slope $C$.
What does foobar
looks like under the log-log
scale?
import matplotlib
matplotlib.rcParams['text.usetex'] = False
matplotlib.rcParams['text.latex.unicode'] = False
x = np.linspace(1, 1000, 5000)
allM = (1 << 26, 1 << 28, 0x5f3759df)
properties = {
(0, 0): {'M': allM, 'C': -2},
(1, 0): {'M': allM, 'C': 8},
(0, 1): {'M': allM, 'C': 0.3},
(1, 1): {'M': allM, 'C': -0.6},
}
fig, axarr = plt.subplots(2, 2, figsize=(14,8));
for key, property in properties.items():
C = property['C']
axarr[key].set_ylim(1e-39, 1e41)
handle, = axarr[key].loglog(x, x ** C, linestyle='dotted');
handles = [handle]
for M in property['M']:
baz = foobar(M, C)
kwargs = {'ls' : 'dashed'} if M == 0x5f3759df else {}
handle, = axarr[key].loglog(x, np.abs(baz(x)), **kwargs)
handles.append(handle)
axarr[key].set_title(r'For slope C = $%s$, ${\rm foobar}_{M,%s}(x)$' % (C, C))
axarr[key].legend(
handles,
[
r'$x^{%s}$' % C,
r'$M = 2^{26}$',
r'$M = 2^{28}$',
r'$M = {\rm 0x5f3759df}$'
], loc=4)
hide_code_in_slideshow()
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/numpy/lib/function_base.py:2276: RuntimeWarning: invalid value encountered in ? (vectorized) outputs = ufunc(*inputs)
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/matplotlib/scale.py:93: RuntimeWarning: invalid value encountered in less_equal mask = a <= 0.0
Notice how as $C$ gets closer to $-\frac{1}{2}$, the 0x5f3759df
line also gets closer to $x^C$.
Honestly, it's hard to grok anything off of a series of pictures, so let's invoke the ancient wisdom of Python and look at a video of this instead.
from IPython.display import HTML
from matplotlib import animation
animation.Animation._repr_html_ = lambda anim: anim.to_html5_video()
x = np.linspace(1, 1000, 5000)
allM = (1 << 26, 1 << 28, 0x5f3759df)
fig = plt.figure(figsize=(14,8))
ax = plt.axes(ylim=(1e-39, 1e41))
def plotSomeMagic(C, fig, ax, handles=None):
if not handles:
handle, = ax.loglog(x, x ** C, linestyle='dotted');
handles = [handle]
for M in allM:
baz = foobar(M, C)
kwargs = {'ls' : 'dashed'} if M == 0x5f3759df else {}
handle, = ax.loglog(x, np.abs(baz(x)), **kwargs)
handles.append(handle)
else:
handles[0].set_data(x, x ** C)
baz = foobar(allM[0], C)
handles[1].set_data(x, np.abs(baz(x)))
baz = foobar(allM[1], C)
handles[2].set_data(x, np.abs(baz(x)))
baz = foobar(allM[2], C)
handles[3].set_data(x, np.abs(baz(x)))
ax.set_title(r'For slope C = $%s$, ${\rm foobar}_{M,%s}(x)$' % (C, C))
ax.legend(
handles,
[
r'$x^{%s}$' % C,
r'$M = 2^{26}$',
r'$M = 2^{28}$',
r'$M = {\rm 0x5f3759df}$'
], loc=4)
return tuple(handles)
handles = plotSomeMagic(0, fig, ax)
# initialization function: plot the background of each frame
def init():
return plotSomeMagic(1, fig, ax, handles)
# animation function. This is called sequentially
def animate(i):
return plotSomeMagic(i, fig, ax, handles)
hide_code_in_slideshow()
video = animation.FuncAnimation(fig, animate, init_func=init, frames=np.arange(-2,8,0.10), interval=100, blit=True)
plt.close();
video
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/numpy/lib/function_base.py:2276: RuntimeWarning: invalid value encountered in ? (vectorized) outputs = ufunc(*inputs) /Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/matplotlib/scale.py:93: RuntimeWarning: invalid value encountered in less_equal mask = a <= 0.0
log-log
equation:log-log
equation isWe know that for each $C$, there's some magic $M^*$ such that $$ \textsf{foobar}_{M^*,C}(x) = \textsf{l2f}\left(M^* + C \cdot \textsf{f2l}(x)\right) \approx x^C $$
How do we find $M^*$? We know that for inverse square-roots, $M^*$ is around 0x5f3759df
.
Question: What happens if you send $x = 1$ into $x^C$?
This is the only crucial insight that we need.
So let's use $x = 1$ as a fixed boundary. If $\textsf{foobar}_{M^*,C}(x)$ is supposed to approximate $x^C$, then we should also expect that $\textsf{foobar}_{M^*,C}(x = 1) = 1^C$.
$$ \textsf{foobar}_{M^*,C}(1) = \textsf{l2f}\left(M^* + C \cdot \textsf{f2l}(1)\right) = 1^C = 1 $$What do we do with that l2f
? Recall that $\textsf{f2l}(\textsf{l2f}(a)) = a$, so let's apply f2l
to both sides to cancel the l2f
out.
Unfortunately, $\textsf{f2l}(1) \ne 1$, but we can subtract the $C \cdot \textsf{f2l}(1)$ from both sides to get
$$ \begin{align*} \left(M^* + C \cdot \textsf{f2l}(1)\right) - C \cdot \textsf{f2l}(1) &= \left(\textsf{f2l}(1)\right) - C \cdot \textsf{f2l}(1) \\ M^* &= \boxed{(1 - C) \cdot \textsf{f2l}(1)} \end{align*} $$This seems to suggest that $\textsf{foobar}_{(1 - C)\textsf{f2l}(1),C}(x) \approx x^C$. How close is this to the truth?
# What is 1#?
display(Latex(r'Just $\textsf{f2l}(1) = \textsf{%s}$.' % hex(f2l(1))))
# What about inverse square-root?
display(Latex(r'For the inverse square-root, its magical constant should be \
$$\left(1 - \frac{-1}{2}\right)\textsf{f2l}(1) = \textsf{%s}$$'
% hex(3 * f2l(1) // 2)))
hide_code_in_slideshow()
Hmm, weren't we expecting 0x5f3759df
instead of 0x5f400000
?
0x5f3759df
is only 0.035% away from 0x5f400000
! Close enough.def qexp(C):
# (1 - C) * f2l(1) + C * f2l(x)
return np.vectorize(lambda x: l2f((1 - C) * f2l(1) + C * f2l(x)))
#define MAGIC 0x3f800000
float qpow(float x, float exponent) {
long i = * ( long * ) &x; // floating point bit hack
i = (1 - exponent) * MAGIC + exponent * i; // what the fuck?
return * ( float * ) &i;
}
x = np.linspace(1, 1000, 5000)
properties = {
(0, 0): {'M': allM, 'C': -1},
(1, 0): {'M': allM, 'C': 2},
(0, 1): {'M': allM, 'C': 0.3},
(1, 1): {'M': allM, 'C': -0.6},
}
fig, axarr = plt.subplots(2, 2, figsize=(14,8));
for key, property in properties.items():
C = property['C']
handle, = axarr[key].plot(x, x ** C);
handles = [handle]
baz = qexp(C)
handle, = axarr[key].plot(x, baz(x))
handles.append(handle)
# axarr[key].set_title(r'For slope C = $%s$, ${\rm foobar}_{M,%s}(x)$' % (C, C))
axarr[key].legend(
handles,
[
r'$x^{%s}$' % C,
r'$M^* = $ %s' % hex(int(C * sharp(1))),
], loc=4)
hide_code_in_slideshow()
Hey, that actually looks pretty good! But what about the errors?
x = np.linspace(1, 1000, 5000)
properties = {
(0, 0): {'C': -1},
(1, 0): {'C': 2},
(0, 1): {'C': 0.3},
(1, 1): {'C': -0.6},
}
fig, axarr = plt.subplots(2, 2, figsize=(14,8));
for key, property in properties.items():
axarr[key].set_ylim(0, 0.5)
axarr[key].yaxis.set_major_formatter(formatter)
C = property['C']
baz = qexp(C)
handle, = axarr[key].plot(x, np.abs(x ** C - baz(x))/(x ** C));
axarr[key].set_title(r'Relative error for $x^{%s}$' % C)
axarr[key].legend(
[handle],
[r'Relative error for $x^{%s}$' % C])
hide_code_in_slideshow()