Created
February 21, 2020 15:08
-
-
Save c200chromebook/c427828882ef1e89734003d8dddf56fc to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import numba | |
from numba import cuda | |
import math | |
@cuda.jit(device=True) | |
def expensive(x): | |
return math.pow(x, 1.001) | |
@cuda.jit(device=True) | |
def cheap(x): | |
return math.sqrt(x) | |
@cuda.jit(device=True) | |
def calc(x): | |
s1 = expensive(x) | |
q = cheap(s1) | |
return q + 1 | |
@cuda.jit(device=True) | |
def threadid(): | |
return cuda.blockDim.x * cuda.blockIdx.x + cuda.threadIdx.x | |
@cuda.jit(argtypes=[numba.int32[:]], debug=True) | |
def kernel(arr): | |
for i in range(1000): | |
arr[threadid()] = calc(arr[threadid()]) | |
def main(): | |
print("SUP") | |
z = np.array(range(0, 5000)) | |
print(z) | |
kernel[5000//128+1, 128](z) | |
print(z) | |
if __name__ == '__main__': | |
main() | |
#Prof with : nvprof -a instruction_execution --export-profile Output python test.py |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment