Skip to content

Instantly share code, notes, and snippets.

@agoose77
Last active March 17, 2024 11:56
Show Gist options
  • Save agoose77/cd77e3b230b2132cbedff05c162eb222 to your computer and use it in GitHub Desktop.
Save agoose77/cd77e3b230b2132cbedff05c162eb222 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 26,
"id": "b306d5f8-9e84-4e15-9d6d-4f4dd5005565",
"metadata": {},
"outputs": [],
"source": [
"import awkward as ak\n",
"import numpy as np\n",
"import numba as nb"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "de2c695c-7ea7-4449-997a-bb35829814dc",
"metadata": {},
"outputs": [],
"source": [
"def nlargest_last(array, n):\n",
" def apply(layout, **kwargs):\n",
" if layout.is_list and layout.purelist_depth == 2:\n",
" layout = layout.to_ListOffsetArray64()\n",
" indices = np.empty(layout.length * n, dtype=np.int64)\n",
" nlargest_kernel(\n",
" layout.offsets.data,\n",
" indices,\n",
" layout.content.data,\n",
" n\n",
" )\n",
" return ak.contents.ListOffsetArray(\n",
" ak.index.Index64(np.arange(0, indices.size+1, n)),\n",
" ak.contents.NumpyArray(indices)\n",
" )\n",
" \n",
" return ak.transform(apply, array)\n",
" \n",
"\n",
"@nb.njit\n",
"def nlargest_kernel(offsets, indices, values, n):\n",
" for i in range(len(offsets) - 1):\n",
" j = offsets[i]\n",
" k = offsets[i+1]\n",
" size = k - j\n",
" p = size - n\n",
" j = np.argpartition(values[j:k], p)\n",
" indices[i*n:(i+1)*n] = j[p:]"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "12935a38-d72d-40aa-9a84-e78f8059103c",
"metadata": {},
"outputs": [],
"source": [
"def nlargest_sorted_last(array, n):\n",
" def apply(layout, **kwargs):\n",
" if layout.is_list and layout.purelist_depth == 2:\n",
" layout = layout.to_ListOffsetArray64()\n",
" indices = np.empty(layout.length * n, dtype=np.int64)\n",
" nlargest_sorted_kernel(\n",
" layout.offsets.data,\n",
" indices,\n",
" layout.content.data,\n",
" n\n",
" )\n",
" return ak.contents.ListOffsetArray(\n",
" ak.index.Index64(np.arange(0, indices.size+1, n)),\n",
" ak.contents.NumpyArray(indices)\n",
" )\n",
" \n",
" return ak.transform(apply, array)\n",
" \n",
"\n",
"@nb.njit\n",
"def nlargest_sorted_kernel(offsets, indices, values, n):\n",
" for i in range(len(offsets) - 1):\n",
" j = offsets[i]\n",
" k = offsets[i+1]\n",
" size = k - j\n",
" p = size - n\n",
" j = np.argpartition(values[j:k], np.arange(p, p+n))\n",
" indices[i*n:(i+1)*n] = j[p:]"
]
},
{
"cell_type": "markdown",
"id": "0b7e9d46-aa89-4a28-8355-0fb00f231c07",
"metadata": {},
"source": [
"## For small jets"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "d4a27024-ee23-43f8-800c-f70f75c1fa72",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4998791"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"n_jets = np.random.randint(3, 8, size=1_000_000)\n",
"pt = ak.unflatten(\n",
" np.random.random(n_jets.sum()),\n",
" n_jets\n",
")\n",
"ak.count(pt)"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "06848e7f-57c5-4739-8dbc-f4fbef3ceffc",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"883 ms ± 15.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%%timeit\n",
"i_a = nlargest_last(pt, 2)\n",
"pt_sorted_a = ak.sort(\n",
" pt[i_a], axis=-1\n",
")\n",
"ordered_pt_a = ak.pad_none(\n",
" pt_sorted_a,\n",
" target=2,\n",
" clip=True\n",
")\n",
"\n",
"leading_jet = ordered_pt_a[:, 0]\n",
"subleading_jet = ordered_pt_a[:, 1]"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "6c70d499-f394-4d4e-9b90-17eea95cd26c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1.3 s ± 30.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%%timeit\n",
"i_b = nlargest_sorted_last(pt, 2)\n",
"ordered_pt_b = ak.pad_none(\n",
" pt[i_b],\n",
" target=2,\n",
" clip=True\n",
")\n",
"\n",
"leading_jet = ordered_pt_b[:, 0]\n",
"subleading_jet = ordered_pt_b[:, 1]"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "865c37f6-0454-4083-adca-4606b0e7600f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"229 ms ± 812 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%%timeit\n",
"pt_sorted_c = ak.sort(\n",
" pt, axis=-1\n",
")[:, :2]\n",
"ordered_pt_c = ak.pad_none(\n",
" pt_sorted_c,\n",
" target=2,\n",
" clip=True\n",
")\n",
"\n",
"leading_jet = ordered_pt_c[:, 0]\n",
"subleading_jet = ordered_pt_c[:, 1]"
]
},
{
"cell_type": "markdown",
"id": "d6a0843f-4be5-4bc2-914b-83717edf890f",
"metadata": {},
"source": [
"## For large jets"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "063a5638-1987-43fd-8566-4188a816392d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"5485005"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"n_jets = np.random.randint(300, 800, size=1_000_0)\n",
"pt = ak.unflatten(\n",
" np.random.random(n_jets.sum()),\n",
" n_jets\n",
")\n",
"ak.count(pt)"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "b06b2c93-a12e-4d35-bd80-2d727a7618c9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"84.5 ms ± 677 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"i_a = nlargest_last(pt, 2)\n",
"pt_sorted_a = ak.sort(\n",
" pt[i_a], axis=-1\n",
")\n",
"ordered_pt_a = ak.pad_none(\n",
" pt_sorted_a,\n",
" target=2,\n",
" clip=True\n",
")\n",
"\n",
"leading_jet = ordered_pt_a[:, 0]\n",
"subleading_jet = ordered_pt_a[:, 1]"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "8445253d-37d5-4f85-b57f-642560278669",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"89.6 ms ± 1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"i_b = nlargest_sorted_last(pt, 2)\n",
"ordered_pt_b = ak.pad_none(\n",
" pt[i_b],\n",
" target=2,\n",
" clip=True\n",
")\n",
"\n",
"leading_jet = ordered_pt_b[:, 0]\n",
"subleading_jet = ordered_pt_b[:, 1]"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "e1654dd5-2d1f-4905-8fd3-20686a747745",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"376 ms ± 3.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%%timeit\n",
"pt_sorted_c = ak.sort(\n",
" pt, axis=-1\n",
")[:, :2]\n",
"ordered_pt_c = ak.pad_none(\n",
" pt_sorted_c,\n",
" target=2,\n",
" clip=True\n",
")\n",
"\n",
"leading_jet = ordered_pt_c[:, 0]\n",
"subleading_jet = ordered_pt_c[:, 1]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment