-
-
Save kfsone/f7f7de6d7c0216cf44cdf97b526a474e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "c8616412-8781-44cd-b163-2e9616c1dbda", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"station ids: 244792, min = 128000000, max = 3929141760, range = 3801141760\n" | |
] | |
} | |
], | |
"source": [ | |
"# Load the IDs from the database\n", | |
"import apsw ; db = apsw.Connection(\"./data/Tradedangerous.db\") ; sids = tuple(r[0] for r in db.execute(\"SELECT station_id FROM station\"))\n", | |
"print(f\"station ids: {len(sids)}, min = {min(sids)}, max = {max(sids)}, range = {max(sids) - min(sids)}\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "b041f0ef-b4aa-4808-971d-e3458cfdd992", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"exploring (30599,524288)\n" | |
] | |
} | |
], | |
"source": [ | |
"# calculate some plausible numbers for reducing these into a worst-case 8 stations per bucket\n", | |
"lower = int(len(sids) / 8) # anything lower can't possibly bottom out at 8-per\n", | |
"for i in range(8, 64): # calculate a power-of-2 at least 2x the count\n", | |
" if (1 << i) >= 2*len(sids):\n", | |
" upper = 1 << i\n", | |
" break\n", | |
"print(f\"exploring ({lower},{upper})\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "7f8ea17b-3c1e-47dc-9f53-8e21e9728273", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# method that hashes all the ids we have and returns the most- and least- populated buckets, and count of empties\n", | |
"from collections import Counter\n", | |
"def analyze(seed: int):\n", | |
" buckets = Counter()\n", | |
" for sid in sids:\n", | |
" bucket = sid % seed\n", | |
" buckets[bucket] += 1\n", | |
" # most_common returns (value, count) in descending order of occupancy\n", | |
" stats = buckets.most_common()\n", | |
" most, least = stats[0], stats[-1]\n", | |
" empties = seed - len(stats)\n", | |
" max_count = 1\n", | |
" while max_count < len(sids) and stats[max_count][1] == most[1]:\n", | |
" max_count += 1\n", | |
" min_count = 1\n", | |
" while min_count > -len(sids) and stats[min_count][1] == least[1]:\n", | |
" min_count += 1\n", | |
" average_per_bucket = len(sids) / (seed - empties)\n", | |
" average_potential = len(sids) / seed\n", | |
" return most, max_count, least, min_count, empties, average_per_bucket, average_potential\n", | |
"\n", | |
"def describe(seed, most, maxc, least, minc, empties, avg, pot):\n", | |
" print(f\"{seed}: {most[1]}/bucket {maxc}x <-> {least[1]}/bucket {minc}x; {empties} empty. avg pop: {avg:.3f}, avg potential: {pot:.3f}\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "7a8492ce-898a-44fb-bebc-a5cc607b6cbb", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"example:\n", | |
"244792: 18/bucket 2x <-> 1/bucket 1x; 213593 empty. avg pop: 7.846, avg potential: 1.000\n" | |
] | |
} | |
], | |
"source": [ | |
"print(\"example:\")\n", | |
"most, maxc, least, minc, empties, avg, pot = analyze(len(sids))\n", | |
"describe(len(sids), most, maxc, least, minc, empties, avg, pot)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "4fdd0a59-837b-497d-9636-f2fd1dbde55a", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "611f584eaa36481f9a86e7caf4c9a446", | |
"version_major": 2, | |
"version_minor": 0 | |
}, | |
"text/plain": [ | |
"Output()" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">30599: 18/bucket 2x <-> 1/bucket 1x; 0 empty. avg pop: 8.000, avg potential: 8.000\n", | |
"</pre>\n" | |
], | |
"text/plain": [ | |
"30599: 18/bucket 2x <-> 1/bucket 1x; 0 empty. avg pop: 8.000, avg potential: 8.000\n" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">30601: 17/bucket 5x <-> 1/bucket 1x; 0 empty. avg pop: 7.999, avg potential: 7.999\n", | |
"</pre>\n" | |
], | |
"text/plain": [ | |
"30601: 17/bucket 5x <-> 1/bucket 1x; 0 empty. avg pop: 7.999, avg potential: 7.999\n" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">30645: 16/bucket 14x <-> 1/bucket 1x; 0 empty. avg pop: 7.988, avg potential: 7.988\n", | |
"</pre>\n" | |
], | |
"text/plain": [ | |
"30645: 16/bucket 14x <-> 1/bucket 1x; 0 empty. avg pop: 7.988, avg potential: 7.988\n" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">32191: 15/bucket 38x <-> 1/bucket 1x; 0 empty. avg pop: 7.604, avg potential: 7.604\n", | |
"</pre>\n" | |
], | |
"text/plain": [ | |
"32191: 15/bucket 38x <-> 1/bucket 1x; 0 empty. avg pop: 7.604, avg potential: 7.604\n" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">34665: 14/bucket 24x <-> 1/bucket 1x; 0 empty. avg pop: 7.062, avg potential: 7.062\n", | |
"</pre>\n" | |
], | |
"text/plain": [ | |
"34665: 14/bucket 24x <-> 1/bucket 1x; 0 empty. avg pop: 7.062, avg potential: 7.062\n" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">41049: 13/bucket 20x <-> 1/bucket 1x; 10 empty. avg pop: 5.965, avg potential: 5.963\n", | |
"</pre>\n" | |
], | |
"text/plain": [ | |
"41049: 13/bucket 20x <-> 1/bucket 1x; 10 empty. avg pop: 5.965, avg potential: 5.963\n" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n" | |
], | |
"text/plain": [] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">\n", | |
"</pre>\n" | |
], | |
"text/plain": [ | |
"\n" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"ename": "KeyboardInterrupt", | |
"evalue": "", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", | |
"Cell \u001b[1;32mIn[5], line 13\u001b[0m\n\u001b[0;32m 10\u001b[0m desc \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSearching...\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 12\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m seed \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(lower, upper\u001b[38;5;241m+\u001b[39m\u001b[38;5;241m1\u001b[39m):\n\u001b[1;32m---> 13\u001b[0m most, maxc, least, minc, empties, avg, pot \u001b[38;5;241m=\u001b[39m \u001b[43manalyze\u001b[49m\u001b[43m(\u001b[49m\u001b[43mseed\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 14\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m most[\u001b[38;5;241m1\u001b[39m] \u001b[38;5;241m<\u001b[39m most_compact:\n\u001b[0;32m 15\u001b[0m describe(seed, most, maxc, least, minc, empties, avg, pot)\n", | |
"Cell \u001b[1;32mIn[3], line 7\u001b[0m, in \u001b[0;36manalyze\u001b[1;34m(seed)\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m sid \u001b[38;5;129;01min\u001b[39;00m sids:\n\u001b[0;32m 6\u001b[0m bucket \u001b[38;5;241m=\u001b[39m sid \u001b[38;5;241m%\u001b[39m seed\n\u001b[1;32m----> 7\u001b[0m buckets[bucket] \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[0;32m 8\u001b[0m \u001b[38;5;66;03m# most_common returns (value, count) in descending order of occupancy\u001b[39;00m\n\u001b[0;32m 9\u001b[0m stats \u001b[38;5;241m=\u001b[39m buckets\u001b[38;5;241m.\u001b[39mmost_common()\n", | |
"\u001b[1;31mKeyboardInterrupt\u001b[0m: " | |
] | |
} | |
], | |
"source": [ | |
"from rich import print as rprint\n", | |
"from rich.console import Console\n", | |
"from rich import progress as rp\n", | |
"\n", | |
"with rp.Progress(rp.SpinnerColumn(), rp.TextColumn(\"\"), rp.BarColumn(), rp.MofNCompleteColumn(), rp.TimeRemainingColumn(), transient=False, auto_refresh=True) as prog:\n", | |
" task = prog.add_task(\"Analyzing...\", total=upper, value=lower, auto_start=True)\n", | |
"\n", | |
" # start describing seed/distributions below this threshold\n", | |
" most_compact = 128\n", | |
" desc = \"Searching...\"\n", | |
" \n", | |
" for seed in range(lower, upper+1):\n", | |
" most, maxc, least, minc, empties, avg, pot = analyze(seed)\n", | |
" if most[1] < most_compact:\n", | |
" describe(seed, most, maxc, least, minc, empties, avg, pot)\n", | |
" most_compact = most[1]\n", | |
" prog.update(task, description=desc, completed=seed)\n", | |
" print(\"finished\")\n", | |
" prog.refresh()\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "e3c1d1a2-dd0f-4e9a-b9b8-9695fd71b6d9", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.10.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment