-
-
Save Hendiadyoin1/f58346d66637deb9156ef360aa158bf9 to your computer and use it in GitHub Desktop.
Some experiments with approximations of the binary logarithm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from enum import Enum\n", | |
"from functools import partial\n", | |
"from time import perf_counter_ns\n", | |
"\n", | |
"import struct\n", | |
"import numpy as np\n", | |
"from matplotlib import pyplot as plt\n", | |
"import pandas as pd\n", | |
"\n", | |
"plt.rcParams['xtick.labelsize'] = 'xx-small'\n", | |
"plt.rcParams['axes.labelsize'] = 'xx-small'\n", | |
"plt.rcParams['ytick.labelsize'] = 'xx-small'\n", | |
"plt.rcParams['legend.fontsize'] = 'xx-small'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"class ApproximationKind(Enum):\n", | |
" Cordic5 = \"CORDIC algorithm (5 steps)\"\n", | |
" Cordic10 = \"CORDIC algorithm (10 steps)\"\n", | |
" Horner3 = \"Horner Fitted Polynomial (3rd degree)\"\n", | |
" Horner4 = \"Horner form Taylor (4th degree, around 3/2)\"\n", | |
" Horner5 = \"Horner form Taylor (5th degree, around 3/2)\"\n", | |
" Taylor3 = \"Fitted polynomial (3rd degree)\"\n", | |
" Taylor4 = \"Taylor (4th degree, around 3/2)\"\n", | |
" TaylorBen = \"Taylor (4th degree, around 3/2, Ben)\"\n", | |
" CubicBen = \"Cubic Polynomial (Ben)\"\n", | |
" CubicBen2 = \"Cubic Polynomial (Benv2)\"\n", | |
" Wikipedia = \"Wikipedias algorithm\"\n", | |
" IEEE1973 = \"Proceedings of the IEEE, Vol6 - A note on base-2 logarithm computations\"\n", | |
" Sun = \"Sun's Algorithm\"\n", | |
" Sun2 = \"An attempt at bettering Suns Algorithm\"\n", | |
"\n", | |
"# Add approximation kinds to this list to exclude from the graphs below.\n", | |
"ignored_kinds = [ApproximationKind.CubicBen,ApproximationKind.Cordic5, ApproximationKind.Cordic10, ApproximationKind.TaylorBen, ApproximationKind.Wikipedia, ApproximationKind.IEEE1973]\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"\n", | |
"def float_to_int(f):\n", | |
" bits = struct.pack(\">f\", f)\n", | |
" return struct.unpack(\">i\", bits)[0]\n", | |
"\n", | |
"\n", | |
"def int_to_float(i):\n", | |
" bits = struct.pack(\">l\", i)\n", | |
" return struct.unpack(\">f\", bits)[0]\n", | |
"\n", | |
"\n", | |
"def as_0_dot(i):\n", | |
" if (i == 0):\n", | |
" return 0.\n", | |
" return int_to_float(i | (126 << 23))\n", | |
"\n", | |
"\n", | |
"def mantissa(f_bits):\n", | |
" return f_bits & 0b111_1111_1111_1111_1111_1111\n", | |
"\n", | |
"\n", | |
"def exponent(f_bits):\n", | |
" return (f_bits >> 23) & 0xFF\n", | |
"\n", | |
"\n", | |
"CORDIC_LUT = np.array([\n", | |
" np.arctanh(2**-i) for i in range(0, 20)\n", | |
"])\n", | |
"\n", | |
"\n", | |
"def cordic_log(a, N=5):\n", | |
" x = a + 1\n", | |
" y = a - 1\n", | |
" z = 0\n", | |
" for i in range(1, N+1):\n", | |
" sigma = -1 if y >= 0 else 1\n", | |
" x, y, z = \\\n", | |
" x + sigma * 2.**(-i) * y,\\\n", | |
" y + sigma * 2.**(-i) * x,\\\n", | |
" z - sigma * np.arctanh(2.**(-i))\n", | |
" # hyperbolic cordic does not converge, unless we repeat some iterations\n", | |
" if i % 3 == 1 and i != 1:\n", | |
" sigma = -1 if y >= 0 else 1\n", | |
" x, y, z = \\\n", | |
" x + sigma * 2**(-i) * y,\\\n", | |
" y + sigma * 2**(-i) * x,\\\n", | |
" z - sigma * CORDIC_LUT[i]\n", | |
"\n", | |
" # z requires no scaling factor\n", | |
" return 2 * z\n", | |
"\n", | |
"\n", | |
"def wiki_log(y, N=5):\n", | |
" # https://en.wikipedia.org/wiki/Binary_logarithm\n", | |
" res = 0\n", | |
" factor = 1\n", | |
" for _ in range(N):\n", | |
" # I think we can loosen this to the residual accuracy from the exponent\n", | |
" if y == 1:\n", | |
" return res\n", | |
" # I think this can be LUTed\n", | |
" m = 1\n", | |
" z = y**2\n", | |
" while (z < 2):\n", | |
" z = z**2\n", | |
" m += 1\n", | |
" res += factor * 2**-m\n", | |
" factor *= 2**-m\n", | |
" y = z/2\n", | |
" return res\n", | |
"\n", | |
"\n", | |
"def ieee_log(p, N=16):\n", | |
" # https://ieeexplore.ieee.org/document/1451248 Fig. 1\n", | |
" X = 0.\n", | |
" a = p\n", | |
" d = 1.\n", | |
" for _ in range(N):\n", | |
" d *= 0.5\n", | |
" if a >= np.sqrt(2):\n", | |
" X = X\n", | |
" a = a*a\n", | |
" else:\n", | |
" X = X + d\n", | |
" a = 0.5 * a*a\n", | |
" return X\n", | |
"\n", | |
"\n", | |
"def my_log(x, approximation_kind: ApproximationKind = ApproximationKind.Horner4):\n", | |
" # only for x in [1, 2)\n", | |
" # other wise we need to extract the exponent first and add it later back in\n", | |
" log_approx = 0\n", | |
" match approximation_kind:\n", | |
" case ApproximationKind.Cordic5:\n", | |
" # the CORDIC algorithm\n", | |
" # - gives the natural log, so we need to divide by ln2\n", | |
" # - \"works\" for values until 1.11817 so we need to rescale and offset\n", | |
" log_approx = cordic_log(x/2, N=5) / np.log(2) + 1\n", | |
" case ApproximationKind.Cordic10:\n", | |
" log_approx = cordic_log(x/2, N=10) / np.log(2) + 1\n", | |
" case ApproximationKind.Horner4:\n", | |
" # Taylor 4 - Horner form\n", | |
" log_approx = x * \\\n", | |
" (x * (x * (0.569953 - 0.0712442 * x) - 1.92359) + 3.84718) - 2.42065\n", | |
" case ApproximationKind.Horner5:\n", | |
" # Taylor 5 - Horner form\n", | |
" log_approx = x * (x * (x * ((0.0379969 * x - 0.356221)\n", | |
" * x + 1.42488) - 3.20599) + 4.80898) - 2.70919\n", | |
" case ApproximationKind.Taylor4:\n", | |
" # Taylor 4 expansion around 1.5\n", | |
" # 0.584963 + 0.961797 (x - 1.5) - 0.320599 (x - 1.5)^2 + 0.142488 (x - 1.5)^3 - 0.0712442 (x - 1.5)^4 + 0.0379969 (x - 1.5)^5 + O((x - 1.5)^6)\n", | |
" log_approx = 0.584963 \\\n", | |
" + 0.961797 * (x - 1.5) \\\n", | |
" - 0.320599 * (x - 1.5)**2 \\\n", | |
" + 0.142488 * (x - 1.5)**3 \\\n", | |
" - 0.0712442 * (x - 1.5)**4\n", | |
" case ApproximationKind.TaylorBen:\n", | |
" # Other Taylor 4 expansion @BenW\n", | |
" log_approx = 0.5849625007211562 \\\n", | |
" + 0.9617966939259757 * (x - 1.5) \\\n", | |
" - 0.32059889797532526 * (x - 1.5)**2 \\\n", | |
" + 0.07124419955007229 * (x - 1.5)**3 \\\n", | |
" - 0.011874033258345379 * (x - 1.5)**4\n", | |
" case ApproximationKind.Taylor3:\n", | |
" # Taylor 3\n", | |
" log_approx = -2.56284771631110 + 4.19641546493297 * x - 2.23432594978817 * \\\n", | |
" x**2 + 0.688101329132870 * x**3 - 0.0873431279665709 * x**4\n", | |
" case ApproximationKind.Horner3:\n", | |
" # Taylor 3 - Horner form\n", | |
" log_approx = x * (x * ((0.688101329132870 - 0.0873431279665709 * x)\n", | |
" * x - 2.23432594978817) + 4.19641546493297) - 2.56284771631110\n", | |
" case ApproximationKind.CubicBen:\n", | |
" # cubic polynomial @BenW\n", | |
" log_approx = ((0.1640425613334453 * x - 1.098865286222745)\n", | |
" * x + 3.1482979293341167) * x - 2.2134752044448174\n", | |
" case ApproximationKind.CubicBen2:\n", | |
" # log_approx = 0.158417795204953*x**3 - 1.05624538905285*x**2 + 3.05981160072389*x - 2.16198400687597\n", | |
" log_approx = x * ((0.158417795204953 * x - 1.05624538905285)\n", | |
" * x + 3.05981160072389) - 2.16198400687597\n", | |
" case ApproximationKind.Wikipedia:\n", | |
" log_approx = wiki_log(x)\n", | |
" case ApproximationKind.IEEE1973:\n", | |
" log_approx = ieee_log(x)\n", | |
" case ApproximationKind.Sun:\n", | |
" inverted = False\n", | |
" if x > 1.4142135623730951: # x > 2 ** 0.5\n", | |
" inverted = True\n", | |
" x = 2 / x\n", | |
" s = (x - 1) / (x + 1)\n", | |
" # ln((1 + s) / (1 - s)) == ln(x)\n", | |
" s2 = s ** 2\n", | |
" high_approx = s2 * (0.6666666666666735130 +\n", | |
" s2 * (0.3999999999940941908 +\n", | |
" s2 * (0.2857142874366239149 +\n", | |
" s2 * (0.2222219843214978396 +\n", | |
" s2 * (0.1818357216161805012 +\n", | |
" s2 * (0.1531383769920937332 +\n", | |
" s2 * 0.1479819860511658591))))))\n", | |
" # ln(x) == 2 * s + s * high_approx\n", | |
" # log2(x) == log2(e) * ln(x)\n", | |
" log_approx = (2 * s + s * high_approx) * 1.4426950408889634\n", | |
" if inverted:\n", | |
" log_approx = 1 - log_approx\n", | |
" case ApproximationKind.Sun2:\n", | |
" inverted = False\n", | |
" if x > 1.4142135623730951: # x > 2 ** 0.5\n", | |
" inverted = True\n", | |
" x = 2 / x\n", | |
" s = (x - 1) / (x + 1)\n", | |
" # ln((1 + s) / (1 - s)) == ln(x)\n", | |
" x = s\n", | |
" ln_x = ((((((((((0.20648070736812757) * x + 0.13066699109344074) * x + 0.022150386402575082) * x + 0.28244084277102227) * x + 0.00030798663557106616) * x + 0.3999814902548139) * x + 6.911444214085853e-07) * x + 0.6666666516447267) * x + 1.672133310190813e-10) * x + 1.9999999999992746) * x + 5.199913861897819e-16\n", | |
" # log2(x) == log2(e) * ln(x)\n", | |
" log_approx = ln_x * 1.4426950408889634\n", | |
" if inverted:\n", | |
" log_approx = 1 - log_approx\n", | |
" return log_approx\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"\n", | |
"xs = np.linspace(1, 2, 1_000)\n", | |
"ys1 = np.log2(xs)\n", | |
"# plt.plot(xs, ys1, label=\"std\")\n", | |
"\n", | |
"used_kinds = [x for x in ApproximationKind if x not in ignored_kinds]\n", | |
"for kind in used_kinds:\n", | |
"\tys2 = np.vectorize(partial(my_log, approximation_kind=kind))(xs)\n", | |
"\tplt.plot(xs, ys2-ys1, label=kind.value)\n", | |
"\n", | |
"plt.legend(loc=\"upper left\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"xs = np.linspace(1, 2, 1_000_000)\n", | |
"ys1 = np.log2(xs)\n", | |
"\n", | |
"\n", | |
"fig, axs = plt.subplots(3, 2, sharex=True, sharey=True)\n", | |
"axs = list(axs.flatten())\n", | |
"fig.set_dpi(150)\n", | |
"# fig.set_dpi(500)\n", | |
"fig.subplots_adjust(wspace=0.05)\n", | |
"\n", | |
"stat_labels = (\"Absolute Mean Error\", \"Max Abs Error\", \"RMSE\")\n", | |
"df = pd.DataFrame(columns=(\"Kind\" , *stat_labels, 'Duration (ms)'))\n", | |
"stats = []\n", | |
"durations = []\n", | |
"\n", | |
"i = 0\n", | |
"ax = axs.pop()\n", | |
"\n", | |
"for kind in used_kinds:\n", | |
" start = perf_counter_ns()\n", | |
" ys2 = np.vectorize(partial(my_log, approximation_kind=kind))(xs)\n", | |
" duration_ns = perf_counter_ns() - start\n", | |
" diff = ys2 - ys1\n", | |
" ax.plot(xs, diff, label=f\"Error: {kind.value}\")\n", | |
"\n", | |
" stats.append((abs(diff.mean()), abs(diff).max(), np.sqrt((diff ** 2).mean())))\n", | |
" durations.append(duration_ns/1000_000)\n", | |
" (mean, maxabs, rmse), duration = stats[-1], durations[-1]\n", | |
" df = pd.concat(\n", | |
" (df,\n", | |
" pd.DataFrame(({\n", | |
" 'Kind':kind.value,\n", | |
" 'Absolute Mean Error':mean,\n", | |
" 'Max Abs Error':maxabs,\n", | |
" 'RMSE':rmse,\n", | |
" 'Duration (ms)':duration\n", | |
" },))), ignore_index=True)\n", | |
"\n", | |
" i += 1\n", | |
" if i % 1 == 0 and len(axs) != 0:\n", | |
" ax.legend()\n", | |
" ax = axs.pop()\n", | |
"\n", | |
"ax.legend()\n", | |
"\n", | |
"df = df.set_index('Kind')\n", | |
"print(df.to_markdown())#floatfmt=\".19f\"))\n", | |
"\n", | |
"stats = np.array(stats)\n", | |
"width = 0.09\n", | |
"multiplier = -2\n", | |
"x = np.arange(len(stat_labels))\n", | |
"\n", | |
"statfig, stats_ax = plt.subplots()\n", | |
"statfig.set_dpi(150)\n", | |
"duration_ax = stats_ax.twinx()\n", | |
"duration_ax.set_ylabel(\"ms\", color=\"g\")\n", | |
"duration_ax.tick_params(axis='y', labelcolor=\"g\")\n", | |
"stats_ax.set_xticks(np.arange(len(stat_labels)+1) +\n", | |
" width, [*stat_labels, \"Duration\"])\n", | |
"stats_ax.set_yscale('log')\n", | |
"\n", | |
"for kind, kind_stats, duration in zip(used_kinds, stats, durations):\n", | |
" offset = width * multiplier\n", | |
" rects = stats_ax.bar(x + offset, kind_stats, width, label=kind.value)\n", | |
" rects = duration_ax.bar([len(stat_labels) + offset],\n", | |
" [duration], width, label=kind.value)\n", | |
" duration_ax.bar_label(rects, padding=3, fontsize=\"xx-small\", color=\"k\")\n", | |
" multiplier += 1\n", | |
"\n", | |
"stats_ax.legend(loc='upper left')\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"xs = np.linspace(1, 2, 1_000)\n", | |
"ys1 = np.log2(xs)\n", | |
"ys2 = np.vectorize(partial(my_log, approximation_kind=ApproximationKind.Sun))(xs)\n", | |
"plt.plot(xs, ys2-ys1, label=ApproximationKind.Sun.value)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.11.2" | |
}, | |
"orig_nbformat": 4, | |
"vscode": { | |
"interpreter": { | |
"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment