Skip to content

Instantly share code, notes, and snippets.

@hellman
Created May 7, 2024 11:58
Show Gist options
  • Save hellman/09733f57e7d0ebee6a22fa9c834b0954 to your computer and use it in GitHub Desktop.
Save hellman/09733f57e7d0ebee6a22fa9c834b0954 to your computer and use it in GitHub Desktop.
Extending CRC32
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "589fb75f-e6c1-4e1e-94f5-eb0e03e2c537",
"metadata": {},
"source": [
"# Defining CRC32"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "d2d16187-054c-4f5d-a5d9-81df84cff1a7",
"metadata": {
"execution": {
"iopub.execute_input": "2024-05-07T11:56:13.705489Z",
"iopub.status.busy": "2024-05-07T11:56:13.704415Z",
"iopub.status.idle": "2024-05-07T11:56:13.719273Z",
"shell.execute_reply": "2024-05-07T11:56:13.716939Z",
"shell.execute_reply.started": "2024-05-07T11:56:13.705354Z"
}
},
"outputs": [],
"source": [
"POLY = 0xEDB8_8320\n",
"# ONE = 0x8000_0000 # 1\n",
"# TWO = 0x4000_0000 # x\n",
"# THREE = 0xC000_0000 # x + 1\n",
"# THREE_INV = 0x492f023f # power(0xc000_0000, 2**32-2)\n",
"MAGIC = 0xB6D0_FDC0 # mul(POLY, THREE_INV) = x^32 / (x+1)\n",
"\n",
"def mul_x(a):\n",
" # LSB\n",
" if a & 1:\n",
" a >>= 1\n",
" a ^= POLY\n",
" else:\n",
" a >>= 1\n",
" return a\n",
"\n",
"\n",
"def mul(a, b):\n",
" res = 0\n",
" while b:\n",
" if b & 0x8000_0000:\n",
" res ^= a\n",
" b ^= 0x8000_0000\n",
" a = mul_x(a)\n",
" b <<= 1\n",
" return res\n",
"\n",
"\n",
"def power(a, e):\n",
" # square and multiply\n",
" res = 0x8000_0000 # one\n",
" while e:\n",
" if e & 1:\n",
" res = mul(res, a)\n",
" e >>= 1\n",
" a = mul(a, a)\n",
" return res \n",
" \n",
"\n",
"def mycrc32(msg):\n",
" s = 0xffff_ffff\n",
" for byte in msg:\n",
" for bit in f\"{byte:08b}\"[::-1]:\n",
" # bit plays the role of x^31 since it's in LSB\n",
" s = mul_x(s ^ int(bit))\n",
" s ^= 0xffff_ffff\n",
" return s\n",
"\n",
"\n",
"def extend_zeros(crc, n): # n in bits\n",
" crc ^= 0xffff_ffff\n",
" crc = mul(crc, power(0x4000_0000, n))\n",
" crc ^= 0xffff_ffff\n",
" return crc\n",
"\n",
"\n",
"def extend_ones(crc, n): # n in bits\n",
" \"\"\"\n",
" We are repeating the step\n",
"\n",
" > c = (c + x^31) * x = c*x + x^32\n",
"\n",
" `n` times, getting\n",
"\n",
" c = c0 * x^n + x^32 * (x^(n-1) + x^(n-2) + ... + x^2 + x + 1)\n",
" = c0 * x^n + x^32 * (x^n - 1) / (x - 1)\n",
"\n",
" Letting\n",
" \n",
" MAGIC = x^32 / (x-1) = POLY / (x-1) = POLY * (x-1)^(2^32-2)\n",
"\n",
" We get\n",
"\n",
" c = c0 * x^n + MAGIC * (x^n-1)\n",
" = x^n * (c0 + MAGIC) - MAGIC\n",
"\n",
" This also means that adding MAGIC before and after digesting some text\n",
" corresponds to flipping all the bits in the text.\n",
" \"\"\"\n",
" crc ^= 0xffff_ffff\n",
" crc = mul(crc ^ MAGIC, power(0x4000_0000, n)) ^ MAGIC\n",
" crc ^= 0xffff_ffff\n",
" return crc"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "c4b3acee-feac-4299-8b90-3b1d7d926968",
"metadata": {
"execution": {
"iopub.execute_input": "2024-05-07T11:56:13.730593Z",
"iopub.status.busy": "2024-05-07T11:56:13.727921Z",
"iopub.status.idle": "2024-05-07T11:56:13.749154Z",
"shell.execute_reply": "2024-05-07T11:56:13.747708Z",
"shell.execute_reply.started": "2024-05-07T11:56:13.730496Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" zlib my | msg\n",
"00000000 00000000 | b''\n",
"d202ef8d d202ef8d | b'\\x00'\n",
"a505df1b a505df1b | b'\\x01'\n",
"2144df1c 2144df1c | b'\\x00\\x00\\x00\\x00'\n",
"ed82cd11 ed82cd11 | b'abcd'\n",
"8587d865 8587d865 | b'abcde'\n"
]
}
],
"source": [
"import zlib\n",
"\n",
"print(\" zlib my | msg\")\n",
"for m in [b\"\", b\"\\x00\", b\"\\x01\", b\"\\x00\"*4, b\"abcd\", b\"abcde\"]:\n",
" print(\"%08x %08x\" % (zlib.crc32(m), mycrc32(m)), \"|\", m)\n",
" assert zlib.crc32(m) == mycrc32(m)"
]
},
{
"cell_type": "markdown",
"id": "7935cfe6-721e-4623-b6ac-61ce26fdb77f",
"metadata": {},
"source": [
"# Test extend with zeros"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "efe1b9b7-71de-46ee-ac60-778d5a6992c3",
"metadata": {
"execution": {
"iopub.execute_input": "2024-05-07T11:56:13.752903Z",
"iopub.status.busy": "2024-05-07T11:56:13.751733Z",
"iopub.status.idle": "2024-05-07T11:56:13.781665Z",
"shell.execute_reply": "2024-05-07T11:56:13.771365Z",
"shell.execute_reply.started": "2024-05-07T11:56:13.752340Z"
}
},
"outputs": [],
"source": [
"import random, os\n",
"for _ in range(100):\n",
" a = random.randrange(1000)\n",
" b = random.randrange(1000)\n",
" s1 = os.urandom(a)\n",
" c = zlib.crc32(s1)\n",
" c1 = zlib.crc32(s1 + b\"\\x00\" * b)\n",
" c2 = extend_zeros(c, b*8)\n",
" assert c1 == c2"
]
},
{
"cell_type": "markdown",
"id": "f464e1d2-a140-45ff-a436-b2681f609f65",
"metadata": {
"execution": {
"iopub.execute_input": "2024-05-07T11:12:36.359239Z",
"iopub.status.busy": "2024-05-07T11:12:36.357896Z",
"iopub.status.idle": "2024-05-07T11:12:36.369277Z",
"shell.execute_reply": "2024-05-07T11:12:36.366900Z",
"shell.execute_reply.started": "2024-05-07T11:12:36.359096Z"
}
},
"source": [
"# Test extend with ones"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "33469630-2d7f-4f3e-b867-97cdd250fe6a",
"metadata": {
"execution": {
"iopub.execute_input": "2024-05-07T11:56:13.787432Z",
"iopub.status.busy": "2024-05-07T11:56:13.786199Z",
"iopub.status.idle": "2024-05-07T11:56:13.824654Z",
"shell.execute_reply": "2024-05-07T11:56:13.822975Z",
"shell.execute_reply.started": "2024-05-07T11:56:13.787261Z"
}
},
"outputs": [],
"source": [
"import random, os\n",
"for _ in range(100):\n",
" a = random.randrange(1000)\n",
" b = random.randrange(1000)\n",
" s1 = os.urandom(a)\n",
" c = zlib.crc32(s1)\n",
" c1 = zlib.crc32(s1 + b\"\\xff\" * b)\n",
" c2 = extend_ones(c, b*8)\n",
" assert c1 == c2 "
]
},
{
"cell_type": "markdown",
"id": "7350084c-6ea1-40e6-aae0-d6f92664a7d2",
"metadata": {},
"source": [
"# Test flip"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "8f2c5a10-934b-4e1d-bc06-2097b8069ab5",
"metadata": {
"execution": {
"iopub.execute_input": "2024-05-07T11:56:13.830373Z",
"iopub.status.busy": "2024-05-07T11:56:13.828268Z",
"iopub.status.idle": "2024-05-07T11:56:13.854759Z",
"shell.execute_reply": "2024-05-07T11:56:13.852262Z",
"shell.execute_reply.started": "2024-05-07T11:56:13.830142Z"
}
},
"outputs": [],
"source": [
"import random, os\n",
"for _ in range(100):\n",
" a = random.randrange(1000)\n",
" s1 = os.urandom(a)\n",
" c = zlib.crc32(s1)\n",
" c1 = zlib.crc32(bytes(0xff^byte for byte in s1))\n",
" c2 = c ^ mul(MAGIC, power(0x4000_0000, 8*a) ^ 0x8000_0000) # MAGIC * (x^n + 1)\n",
" assert c1 == c2 "
]
},
{
"cell_type": "markdown",
"id": "ccf597cc-ec3f-4f7d-a37d-546dd4cf5884",
"metadata": {
"execution": {
"iopub.execute_input": "2024-05-07T11:51:10.326870Z",
"iopub.status.busy": "2024-05-07T11:51:10.326168Z",
"iopub.status.idle": "2024-05-07T11:51:10.334345Z",
"shell.execute_reply": "2024-05-07T11:51:10.332169Z",
"shell.execute_reply.started": "2024-05-07T11:51:10.326792Z"
}
},
"source": [
"# Timings"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "07141352-c856-4a2e-afb3-0fe0ba5e7d40",
"metadata": {
"execution": {
"iopub.execute_input": "2024-05-07T11:56:40.302266Z",
"iopub.status.busy": "2024-05-07T11:56:40.299987Z",
"iopub.status.idle": "2024-05-07T11:56:40.320996Z",
"shell.execute_reply": "2024-05-07T11:56:40.318714Z",
"shell.execute_reply.started": "2024-05-07T11:56:40.301942Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"'3.10.13 (f1607341da97ff5a1e93430b6e8c4af0ad1aa019, Sep 28 2023, 05:41:26)\\n[PyPy 7.3.13 with GCC 10.2.1 20210130 (Red Hat 10.2.1-11)]'"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import sys\n",
"sys.version"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "b525bd2b-fbb1-4ca1-89cd-4d0b50361419",
"metadata": {
"execution": {
"iopub.execute_input": "2024-05-07T11:56:13.860035Z",
"iopub.status.busy": "2024-05-07T11:56:13.858262Z",
"iopub.status.idle": "2024-05-07T11:56:16.169806Z",
"shell.execute_reply": "2024-05-07T11:56:16.167463Z",
"shell.execute_reply.started": "2024-05-07T11:56:13.859233Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2.81 µs ± 62.1 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n"
]
}
],
"source": [
"# 0.5 GB = 2^29 bytes\n",
"%timeit extend_zeros(0xabcd1234, 2**32) "
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "e2ac7d50-f0c6-484a-8467-4366af970bb5",
"metadata": {
"execution": {
"iopub.execute_input": "2024-05-07T11:56:16.176654Z",
"iopub.status.busy": "2024-05-07T11:56:16.175935Z",
"iopub.status.idle": "2024-05-07T11:56:18.996801Z",
"shell.execute_reply": "2024-05-07T11:56:18.995091Z",
"shell.execute_reply.started": "2024-05-07T11:56:16.176554Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3.52 µs ± 603 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n"
]
}
],
"source": [
"# 0.5 GB = 2^29 bytes\n",
"%timeit extend_ones(0xabcd1234, 2**32)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "56dd220c-7689-4b6f-be61-5c07b9c336f9",
"metadata": {
"execution": {
"iopub.execute_input": "2024-05-07T11:56:19.001380Z",
"iopub.status.busy": "2024-05-07T11:56:18.999161Z",
"iopub.status.idle": "2024-05-07T11:56:24.205812Z",
"shell.execute_reply": "2024-05-07T11:56:24.204084Z",
"shell.execute_reply.started": "2024-05-07T11:56:19.001045Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"63.2 µs ± 3.29 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n"
]
}
],
"source": [
"%timeit extend_zeros(0xabcd1234, 2**512)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "PyPy3",
"language": "python",
"name": "pypy3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment