This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from torch.utils.flop_counter import FlopCounterMode | |
from triton.testing import do_bench | |
torch.set_default_device('cuda') | |
def get_flops_achieved(f): | |
flop_counter = FlopCounterMode(display=False) | |
with flop_counter: | |
f() | |
total_flops = flop_counter.get_total_flops() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from torch.utils.flop_counter import FlopCounterMode | |
from triton.testing import do_bench | |
def get_flops_achieved(f): | |
flop_counter = FlopCounterMode(display=False) | |
with flop_counter: | |
f() | |
total_flops = flop_counter.get_total_flops() | |
ms_per_iter = do_bench(f) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from triton.testing import do_bench | |
torch.set_default_device('cuda') | |
for M, K, N in [(2047, 2048, 2048), (2048, 2047, 2048), (2048, 2048, 2047)]: | |
A = torch.randn(M, K, dtype=torch.bfloat16) | |
B = torch.randn(K, N, dtype=torch.bfloat16) | |
print(f"M={M}, K={K}, N={N}") | |
print(do_bench(lambda: torch.mm(A, B))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch._inductor.config | |
import time | |
torch._inductor.config.triton.cudagraphs = False | |
torch.set_float32_matmul_precision('high') | |
def bench(f, name=None, iters=100, warmup=5, display=True, profile=False): | |
for _ in range(warmup): | |
f() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
import torch.nn.utils.parametrize as parametrize | |
from torch.utils._pytree import tree_map | |
class LoraTensor(object): | |
def __init__(self, weights, A, B): | |
self.weights = weights | |
self.A = A | |
self.B = B |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
template <int MAXV, class T = int> struct Dinic { | |
const static bool SCALING = false; // non-scaling = V^2E, Scaling=VElog(U) with higher constant | |
int lim = 1; | |
const T INF = numeric_limits<T>::max(); | |
struct edge { | |
int to, rev; | |
T cap, flow; | |
}; | |
int s = MAXV - 2, t = MAXV - 1; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
template <int maxn> struct FFT { | |
constexpr static int lg2(int n) { return 32 - __builtin_clz(n - 1); } | |
const static int MAXN = 1 << lg2(maxn); | |
typedef complex<double> cpx; | |
int rev[MAXN]; | |
cpx rt[MAXN]; | |
FFT() { | |
rt[1] = cpx{1, 0}; | |
for (int k = 2; k < MAXN; k *= 2) { | |
cpx z[] = {1, polar(1.0, M_PI / k)}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <ext/pb_ds/assoc_container.hpp> | |
using namespace __gnu_pbds; | |
struct chash { | |
const int RANDOM = (long long)(make_unique<char>().get()) ^ chrono::high_resolution_clock::now().time_since_epoch().count(); | |
static unsigned long long hash_f(unsigned long long x) { | |
x += 0x9e3779b97f4a7c15; | |
x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9; | |
x = (x ^ (x >> 27)) * 0x94d049bb133111eb; | |
return x ^ (x >> 31); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
int mobius[MAXPR]; | |
bool sieve[MAXPR]; | |
vector<int> primes; | |
void calcMobius() { | |
mobius[1] = 1; | |
for (int i = 2; i < MAXPR; i++) { | |
if (!sieve[i]) { | |
primes.push_back(i); | |
mobius[i] = -1; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
struct AhoCorasick { | |
struct Vertex { | |
int next[MAXCHAR], go[MAXCHAR]; | |
int leaf = -1; | |
int p = -1; | |
char pch; | |
int link = -1, leaflink = -1; | |
Vertex(int p = -1, char ch = '$') : p(p), pch(ch) { | |
fill(begin(next), end(next), -1); |
NewerOlder