Created
January 12, 2023 04:17
-
-
Save tysun/651cac6bab77f6eb3d842ad3ad5e983b to your computer and use it in GitHub Desktop.
colab-cuda-cmds.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/tysun/651cac6bab77f6eb3d842ad3ad5e983b/colab-cuda-cmds.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "6085f4cb", | |
"metadata": { | |
"id": "6085f4cb", | |
"outputId": "2787f221-a359-4f5b-80b9-1e5021694d9e", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
} | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"nvcc: NVIDIA (R) Cuda compiler driver\n", | |
"Copyright (c) 2005-2021 NVIDIA Corporation\n", | |
"Built on Sun_Feb_14_21:12:58_PST_2021\n", | |
"Cuda compilation tools, release 11.2, V11.2.152\n", | |
"Build cuda_11.2.r11.2/compiler.29618528_0\n" | |
] | |
} | |
], | |
"source": [ | |
"!nvcc --version" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "25d51772", | |
"metadata": { | |
"id": "25d51772", | |
"outputId": "798abb2e-e22e-428e-e7f0-db31ae4ba304", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
} | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", | |
"Collecting git+https://github.com/andreinechaev/nvcc4jupyter.git\n", | |
" Cloning https://github.com/andreinechaev/nvcc4jupyter.git to /tmp/pip-req-build-u8jijzz9\n", | |
" Running command git clone --filter=blob:none --quiet https://github.com/andreinechaev/nvcc4jupyter.git /tmp/pip-req-build-u8jijzz9\n", | |
" Resolved https://github.com/andreinechaev/nvcc4jupyter.git to commit aac710a35f52bb78ab34d2e52517237941399eff\n", | |
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
"Building wheels for collected packages: NVCCPlugin\n", | |
" Building wheel for NVCCPlugin (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
" Created wheel for NVCCPlugin: filename=NVCCPlugin-0.0.2-py3-none-any.whl size=4304 sha256=208fd38f45a2dc6b7326aacb332d71455deafedb9bed2b117df5552d8aa29c0c\n", | |
" Stored in directory: /tmp/pip-ephem-wheel-cache-k6lpwjy2/wheels/f3/08/cc/e2b5b0e1c92df07dbb50a6f024a68ce090f5e7b2316b41756d\n", | |
"Successfully built NVCCPlugin\n", | |
"Installing collected packages: NVCCPlugin\n", | |
"Successfully installed NVCCPlugin-0.0.2\n" | |
] | |
} | |
], | |
"source": [ | |
"!pip install git+https://github.com/andreinechaev/nvcc4jupyter.git" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"%load_ext nvcc_plugin" | |
], | |
"metadata": { | |
"id": "3UHTLgeEDez9", | |
"outputId": "e1bf70f5-5d84-4246-ec29-dc36318102e7", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
} | |
}, | |
"id": "3UHTLgeEDez9", | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"created output directory at /content/src\n", | |
"Out bin /content/result.out\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"%%cu\n", | |
"#include <cstdio>\n", | |
"#include <iostream>\n", | |
"\n", | |
"\tusing namespace std;\n", | |
"\n", | |
"__global__ void maxi(int* a, int* b, int n)\n", | |
"{\n", | |
"\tint block = 256 * blockIdx.x;\n", | |
"\tint max = 0;\n", | |
"\n", | |
"\tfor (int i = block; i < min(256 + block, n); i++) {\n", | |
"\n", | |
"\t\tif (max < a[i]) {\n", | |
"\t\t\tmax = a[i];\n", | |
"\t\t}\n", | |
"\t}\n", | |
"\tb[blockIdx.x] = max;\n", | |
"}\n", | |
"\n", | |
"int main()\n", | |
"{\n", | |
"\n", | |
"\tint n;\n", | |
"\tn = 3 >> 2;\n", | |
"\tint a[n];\n", | |
"\n", | |
"\tfor (int i = 0; i < n; i++) {\n", | |
"\t\ta[i] = rand() % n;\n", | |
"\t\tcout << a[i] << \"\\t\";\n", | |
"\t}\n", | |
"\n", | |
"\tcudaEvent_t start, end;\n", | |
"\tint *ad, *bd;\n", | |
"\tint size = n * sizeof(int);\n", | |
"\tcudaMalloc(&ad, size);\n", | |
"\tcudaMemcpy(ad, a, size, cudaMemcpyHostToDevice);\n", | |
"\tint grids = ceil(n * 1.0f / 256.0f);\n", | |
"\tcudaMalloc(&bd, grids * sizeof(int));\n", | |
"\n", | |
"\tdim3 grid(grids, 1);\n", | |
"\tdim3 block(1, 1);\n", | |
"\n", | |
"\tcudaEventCreate(&start);\n", | |
"\tcudaEventCreate(&end);\n", | |
"\tcudaEventRecord(start);\n", | |
"\n", | |
"\twhile (n > 1) {\n", | |
"\t\tmaxi<<<grids, block>>>(ad, bd, n);\n", | |
"\t\tn = ceil(n * 1.0f / 256.0f);\n", | |
"\t\tcudaMemcpy(ad, bd, n * sizeof(int), cudaMemcpyDeviceToDevice);\n", | |
"\t}\n", | |
"\n", | |
"\tcudaEventRecord(end);\n", | |
"\tcudaEventSynchronize(end);\n", | |
"\n", | |
"\tfloat time = 0;\n", | |
"\tcudaEventElapsedTime(&time, start, end);\n", | |
"\n", | |
"\tint ans[2];\n", | |
"\tcudaMemcpy(ans, ad, 4, cudaMemcpyDeviceToHost);\n", | |
"\n", | |
"\tcout << \"The maximum element is : \" << ans[0] << endl;\n", | |
"\n", | |
"\tcout << \"The time required : \";\n", | |
"\tcout << time << endl;\n", | |
"}\n" | |
], | |
"metadata": { | |
"id": "raib6bNCDtVJ", | |
"outputId": "1d8a321b-26f0-467d-c1cc-9758336f114f", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
} | |
}, | |
"id": "raib6bNCDtVJ", | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"The maximum element is : 61237264\n", | |
"The time required : 0.00256\n", | |
"\n" | |
] | |
} | |
] | |
} | |
], | |
"metadata": { | |
"jupytext": { | |
"cell_metadata_filter": "title,-all", | |
"main_language": "python", | |
"notebook_metadata_filter": "-all" | |
}, | |
"colab": { | |
"provenance": [], | |
"include_colab_link": true | |
}, | |
"language_info": { | |
"name": "python" | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"gpuClass": "standard", | |
"accelerator": "GPU" | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment