Last active
March 21, 2024 05:58
-
-
Save dsaint31x/eb7a1fcc729ba3f349d7259002318976 to your computer and use it in GitHub Desktop.
fancyindexing.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"provenance": [], | |
"machine_shape": "hm", | |
"gpuType": "A100", | |
"authorship_tag": "ABX9TyOYPb+DGOQtkPgiYg9h7WxB", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
}, | |
"accelerator": "GPU" | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/dsaint31x/eb7a1fcc729ba3f349d7259002318976/fancyindexing.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": { | |
"id": "spIczoqfFcHR" | |
}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import torch\n", | |
"import tensorflow as tf\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"# 1D Tensor에서의 Fancy Indexing" | |
], | |
"metadata": { | |
"id": "IYmv6cNG6amF" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"x = np.array([10.,20.,30.,40.,50.])\n", | |
"x_torch = torch.tensor([10.,20.,30.,40.,50.])\n", | |
"x_tf = tf.constant([10.,20.,30.,40.,50.])\n", | |
"\n", | |
"f_indices = [3, 4, 1]\n", | |
"print(x[f_indices])\n", | |
"print(x_torch[f_indices])\n", | |
"print(tf.gather(x_tf,f_indices)) #1D 에선 gahter, 2D 이상시 gather_nd\n", | |
"print(tf.gather_nd(x_tf, [ i for i in zip(f_indices,)])) # 굳이 쓴다면, 다음과 같이." | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "ACqYmcl_FgGg", | |
"outputId": "e9db7728-4cf7-4212-b08e-c3f6a0491a44" | |
}, | |
"execution_count": 26, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"[40. 50. 20.]\n", | |
"tensor([40., 50., 20.])\n", | |
"tf.Tensor([40. 50. 20.], shape=(3,), dtype=float32)\n", | |
"tf.Tensor([40. 50. 20.], shape=(3,), dtype=float32)\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"# 2D Tensor에서의 Fancy Indexiing" | |
], | |
"metadata": { | |
"id": "-8utlOgB6fAU" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"x = np.arange(5*5).reshape(5,5) * 10\n", | |
"x_torch = torch.arange(5*5).view(size=(5,5)) * 10\n", | |
"x_tf = tf.constant(x)\n", | |
"\n", | |
"indices_0 = [0, 1, 2]\n", | |
"indices_1 = [0, 1, 2]\n", | |
"\n", | |
"\n", | |
"b = x[indices_0, indices_1]\n", | |
"print(b.shape)\n", | |
"print(b)\n", | |
"print('----------')\n", | |
"c = x_torch[indices_0, indices_1]\n", | |
"print(c.shape)\n", | |
"print(c)\n", | |
"print('----------')\n", | |
"d = tf.gather_nd(x_tf, [ i for i in zip(indices_0, indices_1)])\n", | |
"print(d.shape)\n", | |
"print(d)" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "qpNxntnl3e-w", | |
"outputId": "27db4b3d-c0e2-4086-96ea-7b5bd0a00aa5" | |
}, | |
"execution_count": 27, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"(3,)\n", | |
"[ 0 60 120]\n", | |
"----------\n", | |
"torch.Size([3])\n", | |
"tensor([ 0, 60, 120])\n", | |
"----------\n", | |
"(3,)\n", | |
"tf.Tensor([ 0 60 120], shape=(3,), dtype=int64)\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"# 3D Tensor Example" | |
], | |
"metadata": { | |
"id": "UOkz8TTs7P5P" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"x = np.arange(5*5*5).reshape(5,5,5) * 10\n", | |
"x_torch = torch.arange(5*5*5).view(size=(5,5,5)) * 10\n", | |
"x_tf = tf.constant(x)\n", | |
"\n", | |
"indices_0 = [0, 1]\n", | |
"indices_1 = [1, 2]\n", | |
"indices_2 = [2, 0]\n", | |
"\n", | |
"b = x[indices_0, indices_1, indices_2]\n", | |
"print(b.shape)\n", | |
"print(b)\n", | |
"print('----------')\n", | |
"c = x_torch[indices_0, indices_1, indices_2]\n", | |
"print(c.shape)\n", | |
"print(c)\n", | |
"print('----------')\n", | |
"d = tf.gather_nd(x_tf, [ i for i in zip(indices_0, indices_1, indices_2)]) # multi-dim 에선 gater_nd 임.\n", | |
"print(d.shape)\n", | |
"print(d)" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "VdnF6uJAFhf3", | |
"outputId": "729c4dcd-b415-4e90-8c90-28e1c615de3a" | |
}, | |
"execution_count": 30, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"(2,)\n", | |
"[ 70 350]\n", | |
"----------\n", | |
"torch.Size([2])\n", | |
"tensor([ 70, 350])\n", | |
"----------\n", | |
"(2,)\n", | |
"tf.Tensor([ 70 350], shape=(2,), dtype=int64)\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"# Boolean Mask" | |
], | |
"metadata": { | |
"id": "wGILBv1uADTc" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"x = np.arange(3*3*3).reshape(3,3,3) * 10\n", | |
"\n", | |
"b = x <= 270/2\n", | |
"print(b.shape)\n", | |
"print(b)\n", | |
"print('----------')\n", | |
"print(x[b])\n", | |
"print('----------')\n", | |
"print(x[x<=270/2])\n", | |
"\n", | |
"b1 = b | (x >= 200)\n", | |
"print('----------')\n", | |
"print(b1)\n", | |
"print('----------')\n", | |
"print(x[b1])\n", | |
"print('----------')\n", | |
"print(x[ (x<=270/2) | (x>=200)])" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "HbPhJsuW7aPy", | |
"outputId": "1ec745a3-e531-4635-e6fe-28f47b5d656f" | |
}, | |
"execution_count": 41, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"(3, 3, 3)\n", | |
"[[[ True True True]\n", | |
" [ True True True]\n", | |
" [ True True True]]\n", | |
"\n", | |
" [[ True True True]\n", | |
" [ True True False]\n", | |
" [False False False]]\n", | |
"\n", | |
" [[False False False]\n", | |
" [False False False]\n", | |
" [False False False]]]\n", | |
"----------\n", | |
"[ 0 10 20 30 40 50 60 70 80 90 100 110 120 130]\n", | |
"----------\n", | |
"[ 0 10 20 30 40 50 60 70 80 90 100 110 120 130]\n", | |
"----------\n", | |
"[[[ True True True]\n", | |
" [ True True True]\n", | |
" [ True True True]]\n", | |
"\n", | |
" [[ True True True]\n", | |
" [ True True False]\n", | |
" [False False False]]\n", | |
"\n", | |
" [[False False True]\n", | |
" [ True True True]\n", | |
" [ True True True]]]\n", | |
"----------\n", | |
"[ 0 10 20 30 40 50 60 70 80 90 100 110 120 130 200 210 220 230\n", | |
" 240 250 260]\n", | |
"----------\n", | |
"[ 0 10 20 30 40 50 60 70 80 90 100 110 120 130 200 210 220 230\n", | |
" 240 250 260]\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"x_torch = torch.arange(3*3*3).view(size=(3,3,3)) * 10\n", | |
"\n", | |
"print(x_torch[ (x_torch<=270/2) | (x_torch>=200)])\n", | |
"\n" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "aooVbv_HAMW4", | |
"outputId": "b3f44c27-4662-411d-d33f-5d1b7142ad4e" | |
}, | |
"execution_count": 43, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130,\n", | |
" 200, 210, 220, 230, 240, 250, 260])\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"x_tf = tf.constant(x)\n", | |
"\n", | |
"print(x_tf[ (x_tf<= tf.cast(270/2, tf.int64)) | (x_tf>=200)])" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "hYF0pR0VAptn", | |
"outputId": "6593385d-0ce5-4774-9e1e-f57af249abe1" | |
}, | |
"execution_count": 48, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"tf.Tensor(\n", | |
"[ 0 10 20 30 40 50 60 70 80 90 100 110 120 130 200 210 220 230\n", | |
" 240 250 260], shape=(21,), dtype=int64)\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"# combined indexing\n", | |
"\n", | |
"기존의 scalar indexing, slicing, fancy indexing, boolear mask 등을 서로 조합하여 사용하는 것.\n", | |
"\n", | |
"numpy에선 효용도가 있으나,\n", | |
"pytorch 나 tensorflow 에서는 사용이 쉽지 않음 (고차원에선 처리가 쉽지 않ㅁ)" | |
], | |
"metadata": { | |
"id": "Mj-WONtDa2G2" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"import numpy as np\n", | |
"\n", | |
"x = np.array([0,1,2,3,4,5,6,7,8]).reshape((3,3))\n", | |
"print(x.shape)\n", | |
"\n", | |
"mask = np.array([True, False, True])\n", | |
"cidx = np.array([2,0,1])\n", | |
"print(mask.shape)\n", | |
"print(cidx.shape)\n", | |
"\n", | |
"print('--------')\n", | |
"print(x)\n", | |
"print('--------')\n", | |
"print(x[mask,cidx[:,np.newaxis]]) # broadcasting 으로 fancyindexing 이 이루어짐." | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "WsfkGlroH7dy", | |
"outputId": "a3d34c91-7da7-4f89-9c9b-86425e0e013b" | |
}, | |
"execution_count": 2, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"(3, 3)\n", | |
"(3,)\n", | |
"(3,)\n", | |
"--------\n", | |
"[[0 1 2]\n", | |
" [3 4 5]\n", | |
" [6 7 8]]\n", | |
"--------\n", | |
"[[2 8]\n", | |
" [0 6]\n", | |
" [1 7]]\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"import torch\n", | |
"\n", | |
"x_torch = torch.tensor([0,1,2,3,4,5,6,7,8]).reshape((3,3))\n", | |
"if torch.cuda.is_available():\n", | |
" x_torch = x_torch.to(\"cuda\")\n", | |
"\n", | |
"x_torch[mask,cidx[:,np.newaxis]]" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "yO5EqGdfdz-y", | |
"outputId": "75544435-1418-4b11-f0ae-cc230eb2acf8" | |
}, | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"tensor([[2, 8],\n", | |
" [0, 6],\n", | |
" [1, 7]], device='cuda:0')" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 4 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"a = np.array([1,0,1])\n", | |
"b = cidx[:,np.newaxis]\n", | |
"a*b" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "aFuP8G83cYVw", | |
"outputId": "caa79994-9a1b-45d3-b330-de4666ac3f3a" | |
}, | |
"execution_count": 53, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"array([[2, 0, 2],\n", | |
" [0, 0, 0],\n", | |
" [1, 0, 1]])" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 53 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"x = np.array([0,1,2,3,4,5,6,7,8]).reshape((3,3))\n", | |
"\n", | |
"# booelan mask 대신, 일반적인 수치로도 가능.\n", | |
"r = x[[0,2],cidx[:,np.newaxis]] # combined indexing의 경우, ₩내부에서 broadcasting 룰이 적용됨.\n", | |
"\n", | |
"print(x)\n", | |
"print('--------')\n", | |
"print(r)" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "KXmgc0plJpW5", | |
"outputId": "c139fb8d-58ca-4e86-f763-e59c7690177b" | |
}, | |
"execution_count": 18, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"[[0 1 2]\n", | |
" [3 4 5]\n", | |
" [6 7 8]]\n", | |
"--------\n", | |
"[[2 8]\n", | |
" [0 6]\n", | |
" [1 7]]\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"import numpy as np\n", | |
"\n", | |
"x = np.array([0,1,2,3,4,5,6,7]).reshape(2,2,2)\n", | |
"t = np.array([0,1])\n", | |
"r = x[[False,True],t[np.newaxis,:]]\n", | |
"print(x.shape)\n", | |
"print(x)\n", | |
"print('--------')\n", | |
"print(r.shape)\n", | |
"print(r)\n", | |
"print('--------')\n", | |
"print(t[np.newaxis, :].shape)\n", | |
"print(t[np.newaxis, :])" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "1dhpJ8JDrWd7", | |
"outputId": "2cf535ee-d061-4ffe-eebb-bb3453488b4b" | |
}, | |
"execution_count": 18, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"(2, 2, 2)\n", | |
"[[[0 1]\n", | |
" [2 3]]\n", | |
"\n", | |
" [[4 5]\n", | |
" [6 7]]]\n", | |
"--------\n", | |
"(1, 2, 2)\n", | |
"[[[4 5]\n", | |
" [6 7]]]\n", | |
"--------\n", | |
"(1, 2)\n", | |
"[[0 1]]\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"# fancy indexing을 활용한 처리." | |
], | |
"metadata": { | |
"id": "rSMSnHlthkLS" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"x = np.arange(10).reshape(1,10)\n", | |
"print('before:',x)\n", | |
"r_idx = np.array([0])\n", | |
"c_idx = np.array([9, 1, 3, 9]) # 중복되어도 더해지진 않음.\n", | |
"x[r_idx,c_idx] = x[r_idx,c_idx] +10\n", | |
"print('after :',x)" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "GxyD7prVKFYZ", | |
"outputId": "5ccfb704-2859-4743-ace5-426a040da003" | |
}, | |
"execution_count": 16, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"before: [[0 1 2 3 4 5 6 7 8 9]]\n", | |
"after : [[ 0 11 2 13 4 5 6 7 8 19]]\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"x = np.arange(10)\n", | |
"print('before:',x)\n", | |
"np.add.at(x,i,10) # 중복된 index를 더해줌.\n", | |
"print('after :',x)" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "W_V4k-67LH-O", | |
"outputId": "1ec8084b-2d70-4fee-d3b5-1e08c0a23f00" | |
}, | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"before: [0 1 2 3 4 5 6 7 8 9]\n", | |
"after : [ 0 11 2 13 4 5 6 7 8 29]\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"x_torch = torch.arange(10)\n", | |
"print(x_torch.dtype)\n", | |
"print('before:',x_torch)\n", | |
"i = torch.tensor([9, 1, 3, 9]) # 중복되어도 더해지진 않음.\n", | |
"x_torch[i] = x_torch[i] +10\n", | |
"print('after :',x_torch)" | |
], | |
"metadata": { | |
"id": "qVbjvaVbMVcl", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "0a094195-a952-4cfb-efe6-d810571fb4c1" | |
}, | |
"execution_count": 9, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"torch.int64\n", | |
"before: tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])\n", | |
"after : tensor([ 0, 11, 2, 13, 4, 5, 6, 7, 8, 19])\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"import numpy as np\n", | |
"\n", | |
"rng = np.random.default_rng(1)\n", | |
"x = rng.random((5,2))\n" | |
], | |
"metadata": { | |
"id": "m1hBVUxHoF9B" | |
}, | |
"execution_count": 4, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"y0 = x[:, np.newaxis]\n", | |
".shape" | |
], | |
"metadata": { | |
"id": "awkm5pHJoXGY", | |
"outputId": "33aa8a05-ce5f-44d9-8add-dfcc147354d8", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
} | |
}, | |
"execution_count": 8, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"(5, 1, 2)" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 8 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [], | |
"metadata": { | |
"id": "uP6xZxUEoqF0" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment