tysun/colab-cuda-cmds.ipynb

## colab-cuda-cmds.ipynb
{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/tysun/651cac6bab77f6eb3d842ad3ad5e983b/colab-cuda-cmds.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "6085f4cb",
      "metadata": {
        "id": "6085f4cb",
        "outputId": "2787f221-a359-4f5b-80b9-1e5021694d9e",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "nvcc: NVIDIA (R) Cuda compiler driver\n",
            "Copyright (c) 2005-2021 NVIDIA Corporation\n",
            "Built on Sun_Feb_14_21:12:58_PST_2021\n",
            "Cuda compilation tools, release 11.2, V11.2.152\n",
            "Build cuda_11.2.r11.2/compiler.29618528_0\n"
          ]
        }
      ],
      "source": [
        "!nvcc --version"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "25d51772",
      "metadata": {
        "id": "25d51772",
        "outputId": "798abb2e-e22e-428e-e7f0-db31ae4ba304",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Collecting git+https://github.com/andreinechaev/nvcc4jupyter.git\n",
            "  Cloning https://github.com/andreinechaev/nvcc4jupyter.git to /tmp/pip-req-build-u8jijzz9\n",
            "  Running command git clone --filter=blob:none --quiet https://github.com/andreinechaev/nvcc4jupyter.git /tmp/pip-req-build-u8jijzz9\n",
            "  Resolved https://github.com/andreinechaev/nvcc4jupyter.git to commit aac710a35f52bb78ab34d2e52517237941399eff\n",
            "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "Building wheels for collected packages: NVCCPlugin\n",
            "  Building wheel for NVCCPlugin (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for NVCCPlugin: filename=NVCCPlugin-0.0.2-py3-none-any.whl size=4304 sha256=208fd38f45a2dc6b7326aacb332d71455deafedb9bed2b117df5552d8aa29c0c\n",
            "  Stored in directory: /tmp/pip-ephem-wheel-cache-k6lpwjy2/wheels/f3/08/cc/e2b5b0e1c92df07dbb50a6f024a68ce090f5e7b2316b41756d\n",
            "Successfully built NVCCPlugin\n",
            "Installing collected packages: NVCCPlugin\n",
            "Successfully installed NVCCPlugin-0.0.2\n"
          ]
        }
      ],
      "source": [
        "!pip install git+https://github.com/andreinechaev/nvcc4jupyter.git"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "%load_ext nvcc_plugin"
      ],
      "metadata": {
        "id": "3UHTLgeEDez9",
        "outputId": "e1bf70f5-5d84-4246-ec29-dc36318102e7",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "id": "3UHTLgeEDez9",
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "created output directory at /content/src\n",
            "Out bin /content/result.out\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "%%cu\n",
        "#include <cstdio>\n",
        "#include <iostream>\n",
        "\n",
        "\tusing namespace std;\n",
        "\n",
        "__global__ void maxi(int* a, int* b, int n)\n",
        "{\n",
        "\tint block = 256 * blockIdx.x;\n",
        "\tint max = 0;\n",
        "\n",
        "\tfor (int i = block; i < min(256 + block, n); i++) {\n",
        "\n",
        "\t\tif (max < a[i]) {\n",
        "\t\t\tmax = a[i];\n",
        "\t\t}\n",
        "\t}\n",
        "\tb[blockIdx.x] = max;\n",
        "}\n",
        "\n",
        "int main()\n",
        "{\n",
        "\n",
        "\tint n;\n",
        "\tn = 3 >> 2;\n",
        "\tint a[n];\n",
        "\n",
        "\tfor (int i = 0; i < n; i++) {\n",
        "\t\ta[i] = rand() % n;\n",
        "\t\tcout << a[i] << \"\\t\";\n",
        "\t}\n",
        "\n",
        "\tcudaEvent_t start, end;\n",
        "\tint *ad, *bd;\n",
        "\tint size = n * sizeof(int);\n",
        "\tcudaMalloc(&ad, size);\n",
        "\tcudaMemcpy(ad, a, size, cudaMemcpyHostToDevice);\n",
        "\tint grids = ceil(n * 1.0f / 256.0f);\n",
        "\tcudaMalloc(&bd, grids * sizeof(int));\n",
        "\n",
        "\tdim3 grid(grids, 1);\n",
        "\tdim3 block(1, 1);\n",
        "\n",
        "\tcudaEventCreate(&start);\n",
        "\tcudaEventCreate(&end);\n",
        "\tcudaEventRecord(start);\n",
        "\n",
        "\twhile (n > 1) {\n",
        "\t\tmaxi<<<grids, block>>>(ad, bd, n);\n",
        "\t\tn = ceil(n * 1.0f / 256.0f);\n",
        "\t\tcudaMemcpy(ad, bd, n * sizeof(int), cudaMemcpyDeviceToDevice);\n",
        "\t}\n",
        "\n",
        "\tcudaEventRecord(end);\n",
        "\tcudaEventSynchronize(end);\n",
        "\n",
        "\tfloat time = 0;\n",
        "\tcudaEventElapsedTime(&time, start, end);\n",
        "\n",
        "\tint ans[2];\n",
        "\tcudaMemcpy(ans, ad, 4, cudaMemcpyDeviceToHost);\n",
        "\n",
        "\tcout << \"The maximum element is : \" << ans[0] << endl;\n",
        "\n",
        "\tcout << \"The time required : \";\n",
        "\tcout << time << endl;\n",
        "}\n"
      ],
      "metadata": {
        "id": "raib6bNCDtVJ",
        "outputId": "1d8a321b-26f0-467d-c1cc-9758336f114f",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "id": "raib6bNCDtVJ",
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "The maximum element is : 61237264\n",
            "The time required : 0.00256\n",
            "\n"
          ]
        }
      ]
    }
  ],
  "metadata": {
    "jupytext": {
      "cell_metadata_filter": "title,-all",
      "main_language": "python",
      "notebook_metadata_filter": "-all"
    },
    "colab": {
      "provenance": [],
      "include_colab_link": true
    },
    "language_info": {
      "name": "python"
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "gpuClass": "standard",
    "accelerator": "GPU"
  },
  "nbformat": 4,
  "nbformat_minor": 5
}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/gist/tysun/651cac6bab77f6eb3d842ad3ad5e983b/colab-cuda-cmds.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "6085f4cb",
	"metadata": {
	"id": "6085f4cb",
	"outputId": "2787f221-a359-4f5b-80b9-1e5021694d9e",
	"colab": {
	"base_uri": "https://localhost:8080/"
	}
	},
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"nvcc: NVIDIA (R) Cuda compiler driver\n",
	"Copyright (c) 2005-2021 NVIDIA Corporation\n",
	"Built on Sun_Feb_14_21:12:58_PST_2021\n",
	"Cuda compilation tools, release 11.2, V11.2.152\n",
	"Build cuda_11.2.r11.2/compiler.29618528_0\n"
	]
	}
	],
	"source": [
	"!nvcc --version"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "25d51772",
	"metadata": {
	"id": "25d51772",
	"outputId": "798abb2e-e22e-428e-e7f0-db31ae4ba304",
	"colab": {
	"base_uri": "https://localhost:8080/"
	}
	},
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
	"Collecting git+https://github.com/andreinechaev/nvcc4jupyter.git\n",
	" Cloning https://github.com/andreinechaev/nvcc4jupyter.git to /tmp/pip-req-build-u8jijzz9\n",
	" Running command git clone --filter=blob:none --quiet https://github.com/andreinechaev/nvcc4jupyter.git /tmp/pip-req-build-u8jijzz9\n",
	" Resolved https://github.com/andreinechaev/nvcc4jupyter.git to commit aac710a35f52bb78ab34d2e52517237941399eff\n",
	" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
	"Building wheels for collected packages: NVCCPlugin\n",
	" Building wheel for NVCCPlugin (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
	" Created wheel for NVCCPlugin: filename=NVCCPlugin-0.0.2-py3-none-any.whl size=4304 sha256=208fd38f45a2dc6b7326aacb332d71455deafedb9bed2b117df5552d8aa29c0c\n",
	" Stored in directory: /tmp/pip-ephem-wheel-cache-k6lpwjy2/wheels/f3/08/cc/e2b5b0e1c92df07dbb50a6f024a68ce090f5e7b2316b41756d\n",
	"Successfully built NVCCPlugin\n",
	"Installing collected packages: NVCCPlugin\n",
	"Successfully installed NVCCPlugin-0.0.2\n"
	]
	}
	],
	"source": [
	"!pip install git+https://github.com/andreinechaev/nvcc4jupyter.git"
	]
	},
	{
	"cell_type": "code",
	"source": [
	"%load_ext nvcc_plugin"
	],
	"metadata": {
	"id": "3UHTLgeEDez9",
	"outputId": "e1bf70f5-5d84-4246-ec29-dc36318102e7",
	"colab": {
	"base_uri": "https://localhost:8080/"
	}
	},
	"id": "3UHTLgeEDez9",
	"execution_count": null,
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"created output directory at /content/src\n",
	"Out bin /content/result.out\n"
	]
	}
	]
	},
	{
	"cell_type": "code",
	"source": [
	"%%cu\n",
	"#include <cstdio>\n",
	"#include <iostream>\n",
	"\n",
	"\tusing namespace std;\n",
	"\n",
	"__global__ void maxi(int* a, int* b, int n)\n",
	"{\n",
	"\tint block = 256 * blockIdx.x;\n",
	"\tint max = 0;\n",
	"\n",
	"\tfor (int i = block; i < min(256 + block, n); i++) {\n",
	"\n",
	"\t\tif (max < a[i]) {\n",
	"\t\t\tmax = a[i];\n",
	"\t\t}\n",
	"\t}\n",
	"\tb[blockIdx.x] = max;\n",
	"}\n",
	"\n",
	"int main()\n",
	"{\n",
	"\n",
	"\tint n;\n",
	"\tn = 3 >> 2;\n",
	"\tint a[n];\n",
	"\n",
	"\tfor (int i = 0; i < n; i++) {\n",
	"\t\ta[i] = rand() % n;\n",
	"\t\tcout << a[i] << \"\\t\";\n",
	"\t}\n",
	"\n",
	"\tcudaEvent_t start, end;\n",
	"\tint ad, bd;\n",
	"\tint size = n * sizeof(int);\n",
	"\tcudaMalloc(&ad, size);\n",
	"\tcudaMemcpy(ad, a, size, cudaMemcpyHostToDevice);\n",
	"\tint grids = ceil(n * 1.0f / 256.0f);\n",
	"\tcudaMalloc(&bd, grids * sizeof(int));\n",
	"\n",
	"\tdim3 grid(grids, 1);\n",
	"\tdim3 block(1, 1);\n",
	"\n",
	"\tcudaEventCreate(&start);\n",
	"\tcudaEventCreate(&end);\n",
	"\tcudaEventRecord(start);\n",
	"\n",
	"\twhile (n > 1) {\n",
	"\t\tmaxi<<<grids, block>>>(ad, bd, n);\n",
	"\t\tn = ceil(n * 1.0f / 256.0f);\n",
	"\t\tcudaMemcpy(ad, bd, n * sizeof(int), cudaMemcpyDeviceToDevice);\n",
	"\t}\n",
	"\n",
	"\tcudaEventRecord(end);\n",
	"\tcudaEventSynchronize(end);\n",
	"\n",
	"\tfloat time = 0;\n",
	"\tcudaEventElapsedTime(&time, start, end);\n",
	"\n",
	"\tint ans[2];\n",
	"\tcudaMemcpy(ans, ad, 4, cudaMemcpyDeviceToHost);\n",
	"\n",
	"\tcout << \"The maximum element is : \" << ans[0] << endl;\n",
	"\n",
	"\tcout << \"The time required : \";\n",
	"\tcout << time << endl;\n",
	"}\n"
	],
	"metadata": {
	"id": "raib6bNCDtVJ",
	"outputId": "1d8a321b-26f0-467d-c1cc-9758336f114f",
	"colab": {
	"base_uri": "https://localhost:8080/"
	}
	},
	"id": "raib6bNCDtVJ",
	"execution_count": null,
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"The maximum element is : 61237264\n",
	"The time required : 0.00256\n",
	"\n"
	]
	}
	]
	}
	],
	"metadata": {
	"jupytext": {
	"cell_metadata_filter": "title,-all",
	"main_language": "python",
	"notebook_metadata_filter": "-all"
	},
	"colab": {
	"provenance": [],
	"include_colab_link": true
	},
	"language_info": {
	"name": "python"
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	},
	"gpuClass": "standard",
	"accelerator": "GPU"
	},
	"nbformat": 4,
	"nbformat_minor": 5
	}