Skip to content

Instantly share code, notes, and snippets.

@dsaint31x
Last active March 21, 2024 05:58
Show Gist options
  • Save dsaint31x/eb7a1fcc729ba3f349d7259002318976 to your computer and use it in GitHub Desktop.
Save dsaint31x/eb7a1fcc729ba3f349d7259002318976 to your computer and use it in GitHub Desktop.
fancyindexing.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"machine_shape": "hm",
"gpuType": "A100",
"authorship_tag": "ABX9TyOYPb+DGOQtkPgiYg9h7WxB",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/dsaint31x/eb7a1fcc729ba3f349d7259002318976/fancyindexing.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"id": "spIczoqfFcHR"
},
"outputs": [],
"source": [
"import numpy as np\n",
"import torch\n",
"import tensorflow as tf\n"
]
},
{
"cell_type": "markdown",
"source": [
"# 1D Tensor에서의 Fancy Indexing"
],
"metadata": {
"id": "IYmv6cNG6amF"
}
},
{
"cell_type": "code",
"source": [
"x = np.array([10.,20.,30.,40.,50.])\n",
"x_torch = torch.tensor([10.,20.,30.,40.,50.])\n",
"x_tf = tf.constant([10.,20.,30.,40.,50.])\n",
"\n",
"f_indices = [3, 4, 1]\n",
"print(x[f_indices])\n",
"print(x_torch[f_indices])\n",
"print(tf.gather(x_tf,f_indices)) #1D 에선 gahter, 2D 이상시 gather_nd\n",
"print(tf.gather_nd(x_tf, [ i for i in zip(f_indices,)])) # 굳이 쓴다면, 다음과 같이."
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "ACqYmcl_FgGg",
"outputId": "e9db7728-4cf7-4212-b08e-c3f6a0491a44"
},
"execution_count": 26,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[40. 50. 20.]\n",
"tensor([40., 50., 20.])\n",
"tf.Tensor([40. 50. 20.], shape=(3,), dtype=float32)\n",
"tf.Tensor([40. 50. 20.], shape=(3,), dtype=float32)\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"# 2D Tensor에서의 Fancy Indexiing"
],
"metadata": {
"id": "-8utlOgB6fAU"
}
},
{
"cell_type": "code",
"source": [
"x = np.arange(5*5).reshape(5,5) * 10\n",
"x_torch = torch.arange(5*5).view(size=(5,5)) * 10\n",
"x_tf = tf.constant(x)\n",
"\n",
"indices_0 = [0, 1, 2]\n",
"indices_1 = [0, 1, 2]\n",
"\n",
"\n",
"b = x[indices_0, indices_1]\n",
"print(b.shape)\n",
"print(b)\n",
"print('----------')\n",
"c = x_torch[indices_0, indices_1]\n",
"print(c.shape)\n",
"print(c)\n",
"print('----------')\n",
"d = tf.gather_nd(x_tf, [ i for i in zip(indices_0, indices_1)])\n",
"print(d.shape)\n",
"print(d)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "qpNxntnl3e-w",
"outputId": "27db4b3d-c0e2-4086-96ea-7b5bd0a00aa5"
},
"execution_count": 27,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"(3,)\n",
"[ 0 60 120]\n",
"----------\n",
"torch.Size([3])\n",
"tensor([ 0, 60, 120])\n",
"----------\n",
"(3,)\n",
"tf.Tensor([ 0 60 120], shape=(3,), dtype=int64)\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"# 3D Tensor Example"
],
"metadata": {
"id": "UOkz8TTs7P5P"
}
},
{
"cell_type": "code",
"source": [
"x = np.arange(5*5*5).reshape(5,5,5) * 10\n",
"x_torch = torch.arange(5*5*5).view(size=(5,5,5)) * 10\n",
"x_tf = tf.constant(x)\n",
"\n",
"indices_0 = [0, 1]\n",
"indices_1 = [1, 2]\n",
"indices_2 = [2, 0]\n",
"\n",
"b = x[indices_0, indices_1, indices_2]\n",
"print(b.shape)\n",
"print(b)\n",
"print('----------')\n",
"c = x_torch[indices_0, indices_1, indices_2]\n",
"print(c.shape)\n",
"print(c)\n",
"print('----------')\n",
"d = tf.gather_nd(x_tf, [ i for i in zip(indices_0, indices_1, indices_2)]) # multi-dim 에선 gater_nd 임.\n",
"print(d.shape)\n",
"print(d)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "VdnF6uJAFhf3",
"outputId": "729c4dcd-b415-4e90-8c90-28e1c615de3a"
},
"execution_count": 30,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"(2,)\n",
"[ 70 350]\n",
"----------\n",
"torch.Size([2])\n",
"tensor([ 70, 350])\n",
"----------\n",
"(2,)\n",
"tf.Tensor([ 70 350], shape=(2,), dtype=int64)\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"# Boolean Mask"
],
"metadata": {
"id": "wGILBv1uADTc"
}
},
{
"cell_type": "code",
"source": [
"x = np.arange(3*3*3).reshape(3,3,3) * 10\n",
"\n",
"b = x <= 270/2\n",
"print(b.shape)\n",
"print(b)\n",
"print('----------')\n",
"print(x[b])\n",
"print('----------')\n",
"print(x[x<=270/2])\n",
"\n",
"b1 = b | (x >= 200)\n",
"print('----------')\n",
"print(b1)\n",
"print('----------')\n",
"print(x[b1])\n",
"print('----------')\n",
"print(x[ (x<=270/2) | (x>=200)])"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "HbPhJsuW7aPy",
"outputId": "1ec745a3-e531-4635-e6fe-28f47b5d656f"
},
"execution_count": 41,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"(3, 3, 3)\n",
"[[[ True True True]\n",
" [ True True True]\n",
" [ True True True]]\n",
"\n",
" [[ True True True]\n",
" [ True True False]\n",
" [False False False]]\n",
"\n",
" [[False False False]\n",
" [False False False]\n",
" [False False False]]]\n",
"----------\n",
"[ 0 10 20 30 40 50 60 70 80 90 100 110 120 130]\n",
"----------\n",
"[ 0 10 20 30 40 50 60 70 80 90 100 110 120 130]\n",
"----------\n",
"[[[ True True True]\n",
" [ True True True]\n",
" [ True True True]]\n",
"\n",
" [[ True True True]\n",
" [ True True False]\n",
" [False False False]]\n",
"\n",
" [[False False True]\n",
" [ True True True]\n",
" [ True True True]]]\n",
"----------\n",
"[ 0 10 20 30 40 50 60 70 80 90 100 110 120 130 200 210 220 230\n",
" 240 250 260]\n",
"----------\n",
"[ 0 10 20 30 40 50 60 70 80 90 100 110 120 130 200 210 220 230\n",
" 240 250 260]\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"x_torch = torch.arange(3*3*3).view(size=(3,3,3)) * 10\n",
"\n",
"print(x_torch[ (x_torch<=270/2) | (x_torch>=200)])\n",
"\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "aooVbv_HAMW4",
"outputId": "b3f44c27-4662-411d-d33f-5d1b7142ad4e"
},
"execution_count": 43,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130,\n",
" 200, 210, 220, 230, 240, 250, 260])\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"x_tf = tf.constant(x)\n",
"\n",
"print(x_tf[ (x_tf<= tf.cast(270/2, tf.int64)) | (x_tf>=200)])"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "hYF0pR0VAptn",
"outputId": "6593385d-0ce5-4774-9e1e-f57af249abe1"
},
"execution_count": 48,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"tf.Tensor(\n",
"[ 0 10 20 30 40 50 60 70 80 90 100 110 120 130 200 210 220 230\n",
" 240 250 260], shape=(21,), dtype=int64)\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"# combined indexing\n",
"\n",
"기존의 scalar indexing, slicing, fancy indexing, boolear mask 등을 서로 조합하여 사용하는 것.\n",
"\n",
"numpy에선 효용도가 있으나,\n",
"pytorch 나 tensorflow 에서는 사용이 쉽지 않음 (고차원에선 처리가 쉽지 않ㅁ)"
],
"metadata": {
"id": "Mj-WONtDa2G2"
}
},
{
"cell_type": "code",
"source": [
"import numpy as np\n",
"\n",
"x = np.array([0,1,2,3,4,5,6,7,8]).reshape((3,3))\n",
"print(x.shape)\n",
"\n",
"mask = np.array([True, False, True])\n",
"cidx = np.array([2,0,1])\n",
"print(mask.shape)\n",
"print(cidx.shape)\n",
"\n",
"print('--------')\n",
"print(x)\n",
"print('--------')\n",
"print(x[mask,cidx[:,np.newaxis]]) # broadcasting 으로 fancyindexing 이 이루어짐."
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "WsfkGlroH7dy",
"outputId": "a3d34c91-7da7-4f89-9c9b-86425e0e013b"
},
"execution_count": 2,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"(3, 3)\n",
"(3,)\n",
"(3,)\n",
"--------\n",
"[[0 1 2]\n",
" [3 4 5]\n",
" [6 7 8]]\n",
"--------\n",
"[[2 8]\n",
" [0 6]\n",
" [1 7]]\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"import torch\n",
"\n",
"x_torch = torch.tensor([0,1,2,3,4,5,6,7,8]).reshape((3,3))\n",
"if torch.cuda.is_available():\n",
" x_torch = x_torch.to(\"cuda\")\n",
"\n",
"x_torch[mask,cidx[:,np.newaxis]]"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "yO5EqGdfdz-y",
"outputId": "75544435-1418-4b11-f0ae-cc230eb2acf8"
},
"execution_count": 4,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"tensor([[2, 8],\n",
" [0, 6],\n",
" [1, 7]], device='cuda:0')"
]
},
"metadata": {},
"execution_count": 4
}
]
},
{
"cell_type": "code",
"source": [
"a = np.array([1,0,1])\n",
"b = cidx[:,np.newaxis]\n",
"a*b"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "aFuP8G83cYVw",
"outputId": "caa79994-9a1b-45d3-b330-de4666ac3f3a"
},
"execution_count": 53,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array([[2, 0, 2],\n",
" [0, 0, 0],\n",
" [1, 0, 1]])"
]
},
"metadata": {},
"execution_count": 53
}
]
},
{
"cell_type": "code",
"source": [
"x = np.array([0,1,2,3,4,5,6,7,8]).reshape((3,3))\n",
"\n",
"# booelan mask 대신, 일반적인 수치로도 가능.\n",
"r = x[[0,2],cidx[:,np.newaxis]] # combined indexing의 경우, ₩내부에서 broadcasting 룰이 적용됨.\n",
"\n",
"print(x)\n",
"print('--------')\n",
"print(r)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "KXmgc0plJpW5",
"outputId": "c139fb8d-58ca-4e86-f763-e59c7690177b"
},
"execution_count": 18,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[[0 1 2]\n",
" [3 4 5]\n",
" [6 7 8]]\n",
"--------\n",
"[[2 8]\n",
" [0 6]\n",
" [1 7]]\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"import numpy as np\n",
"\n",
"x = np.array([0,1,2,3,4,5,6,7]).reshape(2,2,2)\n",
"t = np.array([0,1])\n",
"r = x[[False,True],t[np.newaxis,:]]\n",
"print(x.shape)\n",
"print(x)\n",
"print('--------')\n",
"print(r.shape)\n",
"print(r)\n",
"print('--------')\n",
"print(t[np.newaxis, :].shape)\n",
"print(t[np.newaxis, :])"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "1dhpJ8JDrWd7",
"outputId": "2cf535ee-d061-4ffe-eebb-bb3453488b4b"
},
"execution_count": 18,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"(2, 2, 2)\n",
"[[[0 1]\n",
" [2 3]]\n",
"\n",
" [[4 5]\n",
" [6 7]]]\n",
"--------\n",
"(1, 2, 2)\n",
"[[[4 5]\n",
" [6 7]]]\n",
"--------\n",
"(1, 2)\n",
"[[0 1]]\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"# fancy indexing을 활용한 처리."
],
"metadata": {
"id": "rSMSnHlthkLS"
}
},
{
"cell_type": "code",
"source": [
"x = np.arange(10).reshape(1,10)\n",
"print('before:',x)\n",
"r_idx = np.array([0])\n",
"c_idx = np.array([9, 1, 3, 9]) # 중복되어도 더해지진 않음.\n",
"x[r_idx,c_idx] = x[r_idx,c_idx] +10\n",
"print('after :',x)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "GxyD7prVKFYZ",
"outputId": "5ccfb704-2859-4743-ace5-426a040da003"
},
"execution_count": 16,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"before: [[0 1 2 3 4 5 6 7 8 9]]\n",
"after : [[ 0 11 2 13 4 5 6 7 8 19]]\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"x = np.arange(10)\n",
"print('before:',x)\n",
"np.add.at(x,i,10) # 중복된 index를 더해줌.\n",
"print('after :',x)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "W_V4k-67LH-O",
"outputId": "1ec8084b-2d70-4fee-d3b5-1e08c0a23f00"
},
"execution_count": 6,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"before: [0 1 2 3 4 5 6 7 8 9]\n",
"after : [ 0 11 2 13 4 5 6 7 8 29]\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"x_torch = torch.arange(10)\n",
"print(x_torch.dtype)\n",
"print('before:',x_torch)\n",
"i = torch.tensor([9, 1, 3, 9]) # 중복되어도 더해지진 않음.\n",
"x_torch[i] = x_torch[i] +10\n",
"print('after :',x_torch)"
],
"metadata": {
"id": "qVbjvaVbMVcl",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "0a094195-a952-4cfb-efe6-d810571fb4c1"
},
"execution_count": 9,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"torch.int64\n",
"before: tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])\n",
"after : tensor([ 0, 11, 2, 13, 4, 5, 6, 7, 8, 19])\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"import numpy as np\n",
"\n",
"rng = np.random.default_rng(1)\n",
"x = rng.random((5,2))\n"
],
"metadata": {
"id": "m1hBVUxHoF9B"
},
"execution_count": 4,
"outputs": []
},
{
"cell_type": "code",
"source": [
"y0 = x[:, np.newaxis]\n",
".shape"
],
"metadata": {
"id": "awkm5pHJoXGY",
"outputId": "33aa8a05-ce5f-44d9-8add-dfcc147354d8",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"execution_count": 8,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(5, 1, 2)"
]
},
"metadata": {},
"execution_count": 8
}
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "uP6xZxUEoqF0"
},
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment