Skip to content

Instantly share code, notes, and snippets.

@tysun
Created January 12, 2023 04:17
Show Gist options
  • Save tysun/651cac6bab77f6eb3d842ad3ad5e983b to your computer and use it in GitHub Desktop.
Save tysun/651cac6bab77f6eb3d842ad3ad5e983b to your computer and use it in GitHub Desktop.
colab-cuda-cmds.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/tysun/651cac6bab77f6eb3d842ad3ad5e983b/colab-cuda-cmds.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6085f4cb",
"metadata": {
"id": "6085f4cb",
"outputId": "2787f221-a359-4f5b-80b9-1e5021694d9e",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"nvcc: NVIDIA (R) Cuda compiler driver\n",
"Copyright (c) 2005-2021 NVIDIA Corporation\n",
"Built on Sun_Feb_14_21:12:58_PST_2021\n",
"Cuda compilation tools, release 11.2, V11.2.152\n",
"Build cuda_11.2.r11.2/compiler.29618528_0\n"
]
}
],
"source": [
"!nvcc --version"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "25d51772",
"metadata": {
"id": "25d51772",
"outputId": "798abb2e-e22e-428e-e7f0-db31ae4ba304",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Collecting git+https://github.com/andreinechaev/nvcc4jupyter.git\n",
" Cloning https://github.com/andreinechaev/nvcc4jupyter.git to /tmp/pip-req-build-u8jijzz9\n",
" Running command git clone --filter=blob:none --quiet https://github.com/andreinechaev/nvcc4jupyter.git /tmp/pip-req-build-u8jijzz9\n",
" Resolved https://github.com/andreinechaev/nvcc4jupyter.git to commit aac710a35f52bb78ab34d2e52517237941399eff\n",
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
"Building wheels for collected packages: NVCCPlugin\n",
" Building wheel for NVCCPlugin (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for NVCCPlugin: filename=NVCCPlugin-0.0.2-py3-none-any.whl size=4304 sha256=208fd38f45a2dc6b7326aacb332d71455deafedb9bed2b117df5552d8aa29c0c\n",
" Stored in directory: /tmp/pip-ephem-wheel-cache-k6lpwjy2/wheels/f3/08/cc/e2b5b0e1c92df07dbb50a6f024a68ce090f5e7b2316b41756d\n",
"Successfully built NVCCPlugin\n",
"Installing collected packages: NVCCPlugin\n",
"Successfully installed NVCCPlugin-0.0.2\n"
]
}
],
"source": [
"!pip install git+https://github.com/andreinechaev/nvcc4jupyter.git"
]
},
{
"cell_type": "code",
"source": [
"%load_ext nvcc_plugin"
],
"metadata": {
"id": "3UHTLgeEDez9",
"outputId": "e1bf70f5-5d84-4246-ec29-dc36318102e7",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"id": "3UHTLgeEDez9",
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"created output directory at /content/src\n",
"Out bin /content/result.out\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"%%cu\n",
"#include <cstdio>\n",
"#include <iostream>\n",
"\n",
"\tusing namespace std;\n",
"\n",
"__global__ void maxi(int* a, int* b, int n)\n",
"{\n",
"\tint block = 256 * blockIdx.x;\n",
"\tint max = 0;\n",
"\n",
"\tfor (int i = block; i < min(256 + block, n); i++) {\n",
"\n",
"\t\tif (max < a[i]) {\n",
"\t\t\tmax = a[i];\n",
"\t\t}\n",
"\t}\n",
"\tb[blockIdx.x] = max;\n",
"}\n",
"\n",
"int main()\n",
"{\n",
"\n",
"\tint n;\n",
"\tn = 3 >> 2;\n",
"\tint a[n];\n",
"\n",
"\tfor (int i = 0; i < n; i++) {\n",
"\t\ta[i] = rand() % n;\n",
"\t\tcout << a[i] << \"\\t\";\n",
"\t}\n",
"\n",
"\tcudaEvent_t start, end;\n",
"\tint *ad, *bd;\n",
"\tint size = n * sizeof(int);\n",
"\tcudaMalloc(&ad, size);\n",
"\tcudaMemcpy(ad, a, size, cudaMemcpyHostToDevice);\n",
"\tint grids = ceil(n * 1.0f / 256.0f);\n",
"\tcudaMalloc(&bd, grids * sizeof(int));\n",
"\n",
"\tdim3 grid(grids, 1);\n",
"\tdim3 block(1, 1);\n",
"\n",
"\tcudaEventCreate(&start);\n",
"\tcudaEventCreate(&end);\n",
"\tcudaEventRecord(start);\n",
"\n",
"\twhile (n > 1) {\n",
"\t\tmaxi<<<grids, block>>>(ad, bd, n);\n",
"\t\tn = ceil(n * 1.0f / 256.0f);\n",
"\t\tcudaMemcpy(ad, bd, n * sizeof(int), cudaMemcpyDeviceToDevice);\n",
"\t}\n",
"\n",
"\tcudaEventRecord(end);\n",
"\tcudaEventSynchronize(end);\n",
"\n",
"\tfloat time = 0;\n",
"\tcudaEventElapsedTime(&time, start, end);\n",
"\n",
"\tint ans[2];\n",
"\tcudaMemcpy(ans, ad, 4, cudaMemcpyDeviceToHost);\n",
"\n",
"\tcout << \"The maximum element is : \" << ans[0] << endl;\n",
"\n",
"\tcout << \"The time required : \";\n",
"\tcout << time << endl;\n",
"}\n"
],
"metadata": {
"id": "raib6bNCDtVJ",
"outputId": "1d8a321b-26f0-467d-c1cc-9758336f114f",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"id": "raib6bNCDtVJ",
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"The maximum element is : 61237264\n",
"The time required : 0.00256\n",
"\n"
]
}
]
}
],
"metadata": {
"jupytext": {
"cell_metadata_filter": "title,-all",
"main_language": "python",
"notebook_metadata_filter": "-all"
},
"colab": {
"provenance": [],
"include_colab_link": true
},
"language_info": {
"name": "python"
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"gpuClass": "standard",
"accelerator": "GPU"
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment