Skip to content

Instantly share code, notes, and snippets.

View timshen91's full-sized avatar

Tim Shen timshen91

View GitHub Profile
@timshen91
timshen91 / gist:9ba7612f877839cdd346199e22efca6a
Last active March 3, 2022 19:26
Benchmarking Turing MMA instructions
#include <cuda.h>
#include <cuda_fp16.h>
__device__ inline void mma_fp16_acc_fp16(unsigned const *A, unsigned const *B,
unsigned const *C, unsigned *D) {
asm volatile(
"mma.sync.aligned.m16n8k8.row.col.f16.f16.f16.f16 {%0,%1}, "
"{%2,%3}, {%4}, {%5,%6};\n"
: "=r"(D[0]), "=r"(D[1])
: "r"(A[0]), "r"(A[1]), "r"(B[0]), "r"(C[0]), "r"(C[1]));
set -g prefix C-h
set -g escape-time 0
set -g default-terminal "screen-256color"
bind-key -n M-t new-window -c '#{pane_current_path}' -a -t '{end}'
bind-key -n M-w kill-window
bind-key -n M-j next-window
bind-key -n M-k previous-window
bind-key -n M-J swap-window -d -t +1
bind-key -n M-K swap-window -d -t -1
bind-key -n M-f copy-mode
/*
clang++ --version
clang++ -DTEST_WITH_FUNC=SumAutoVec a.cc -std=c++11 -O2 && time ./a.out
clang++ -DTEST_WITH_FUNC=SumSad a.cc -std=c++11 -O2 && time ./a.out
clang++ -DTEST_WITH_FUNC=SumElementWiseInt16Acc a.cc -std=c++11 -O2 && time ./a.out
g++ --version
g++ -DTEST_WITH_FUNC=SumAutoVec a.cc -std=c++11 -O2 && time ./a.out
g++ -DTEST_WITH_FUNC=SumSad a.cc -std=c++11 -O2 && time ./a.out
g++ -DTEST_WITH_FUNC=SumElementWiseInt16Acc a.cc -std=c++11 -O2 && time ./a.out
@timshen91
timshen91 / sc2_repl.py
Created August 12, 2017 06:41
StarCraftII Repl
#!/usr/bin/python3
import websocket
from s2clientprotocol import sc2api_pb2
from google.protobuf import text_format
conn = websocket.create_connection("ws://127.0.0.1:9999/sc2api")
try:
while True:
@timshen91
timshen91 / a.cc
Last active May 12, 2017 21:03
atomic reordering
#include <atomic>
int a;
std::atomic<bool> a_meaningful { 0 }; // a is meaningful only if a_meaningful
int Read() {
if (a_meaningful.load(std::memory_order_acquire)) {
return a;
}
return -1;
a.cc:
void Bar();
void Foo(std::atomic<uint32_t> *a) {
if (a->load(std::memory_order_acquire) != 0) {
Bar();
}
}
@timshen91
timshen91 / view.cc
Last active February 6, 2017 21:12
#include <utility>
#include <vector>
template<typename UnderlyingRangeType>
struct some_range_view {
some_range_view(UnderlyingRangeType range) : storage_(std::forward<UnderlyingRangeType>(range)) {}
UnderlyingRangeType storage_;
};
#include <type_traits>
#include <utility>
template<typename T>
class SharedPtr;
template<typename Base>
class RefCountMixin {
protected:
RefCountMixin() = default;
#include <stdio.h>
typedef struct Tree {
struct Tree* left;
struct Tree* right;
int value;
} Tree;
void PostTraverse(Tree* node) {
if (node) {
template<int n>
struct IntT {};
template<typename T, int n>
T& operator<<(T& t, IntT<n>) {
return t << n;
}
template<int n, int m>
constexpr bool operator==(IntT<n>, IntT<m>) {