This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Text.Printf | |
import Control.Exception | |
import System.CPUTime | |
-- time function implementation borrowed from | |
-- http://www.haskell.org/haskellwiki/Timing_computations | |
time :: IO t -> IO t | |
time a = do | |
start <- getCPUTime | |
v <- a |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from elftools.elf.elffile import ELFFile | |
import struct | |
import sys | |
# From https://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size | |
def sizeof_fmt(num, suffix='B'): | |
for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']: | |
if abs(num) < 1024.0: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
def hflip(img: torch.Tensor) -> torch.Tensor: return img.flip(-1) | |
print(torch.jit.script(hflip)(torch.rand(3, 8, 8))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# as -o hello.o hello.S ; cc -o hello hello.o -nostdlib | |
.text | |
.globl _start | |
.type _start, @function | |
_start: | |
movl $1, %eax # sys_write( | |
movl $1, %edi # fd = stdout, | |
movl $.LC0, %esi # buf = LC0, | |
movl $12, %edx # 12); | |
syscall |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// gcc -c -Os -mavx512f -masm=intel | |
#include <immintrin.h> | |
float foo(float* con) { | |
__mmask16 msk = 0x00ff; | |
__m512 a = _mm512_maskz_loadu_ps(msk, con); | |
__m512 b = _mm512_set1_ps(con[1]); | |
__m512 c = _mm512_mul_ps(a,b); | |
return ((float *)&c)[0]; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// nvcc -o hello hello.cu; ./hello | |
#include <stdio.h> | |
__global__ void kernel() { | |
printf("Hello World of CUDA\n"); | |
} | |
int main() { | |
kernel<<<1,1>>>(); | |
return cudaDeviceSynchronize(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[Inline Frame] torch_cuda.dll!std::_Default_allocator_traits<std::allocator<std::_Tree_node<unsigned int,void *>>>::deallocate(std::allocator<std::_Tree_node<unsigned int,void *>> &) Line 689 C++ | |
[Inline Frame] torch_cuda.dll!std::_Tree_node<unsigned int,void *>::_Freenode0(std::allocator<std::_Tree_node<unsigned int,void *>> &) Line 373 C++ | |
[Inline Frame] torch_cuda.dll!std::_Tree_val<std::_Tree_simple_types<unsigned int>>::_Erase_head(std::allocator<std::_Tree_node<unsigned int,void *>> &) Line 753 C++ | |
[Inline Frame] torch_cuda.dll!std::_Tree<std::_Tset_traits<unsigned int,std::less<unsigned int>,std::allocator<unsigned int>,0>>::{dtor}() Line 1191 C++ | |
> torch_cuda.dll!torch::jit::fuser::newForReduction(torch::jit::fuser::TensorView * tv, const std::vector<unsigned int,std::allocator<unsigned int>> & axes) Line 438 C++ | |
torch_cuda.dll!torch::jit::fuser::reductionOp(torch::jit::fuser::BinaryOpType reduction_op_type, const std::vector<int,std::allocator<int>> & axes, torch::jit::fuser::Val * init, to |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <arm_neon.h> | |
#include <math.h> | |
#include <stdio.h> | |
void run_neon_reciproc(float data_in[4], float data_out[4]) { | |
float32x4_t input = vld1q_f32(data_in); | |
float32x4_t out = vrecpeq_f32(input); | |
//out = vmulq_f32(vrecpsq_f32(input, out), out); | |
//out = vmulq_f32(vrecpsq_f32(input, out), out); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import boto3 | |
import os | |
import bz2 | |
import json | |
import subprocess | |
from datetime import datetime | |
def get_git_commit_history(path, branch="master"): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from datetime import datetime | |
from typing import Any, Dict, List, Optional, Union | |
from urllib.request import urlopen, Request | |
import json | |
import enum | |
import os | |
OlderNewer