Skip to content

Instantly share code, notes, and snippets.

b=128, s_q=1, mean_sk=4096, h_q=16, h_kv=1, d=576, dv=512, causal=True, varlen=False
0.164 ms, 112 TFLOPS, 3718 GB/s
b=128, s_q=1, mean_sk=4096, h_q=16, h_kv=1, d=576, dv=512, causal=True, varlen=True
0.175 ms, 105 TFLOPS, 3498 GB/s
b=128, s_q=2, mean_sk=4096, h_q=16, h_kv=1, d=576, dv=512, causal=True, varlen=False
0.168 ms, 218 TFLOPS, 3659 GB/s
b=128, s_q=2, mean_sk=4096, h_q=16, h_kv=1, d=576, dv=512, causal=True, varlen=True
0.184 ms, 205 TFLOPS, 3445 GB/s
b=128, s_q=1, mean_sk=4096, h_q=32, h_kv=1, d=576, dv=512, causal=True, varlen=False
0.166 ms, 220 TFLOPS, 3685 GB/s
This file has been truncated, but you can view the full file.
#0 building with "default" instance using docker driver
#1 [internal] load .dockerignore
#1 transferring context: 2B done
#1 ...
#2 [internal] load build definition from Dockerfile.cuda
#2 transferring dockerfile: 7.37kB done
#2 DONE 0.3s
2024-11-26 05:18:13,602 INFO prefiller:0-0:3853225:140670816544576 prefill_worker.py:886] Prefill worker step took more than 3 seconds, FIND ME YING 10.254240989685059
2024-11-26 05:18:43,639 INFO prefiller:0-0:3853225:140670816544576 prefill_worker.py:886] Prefill worker step took more than 3 seconds, FIND ME YING 9.305031538009644
2024-11-26 05:19:13,677 INFO prefiller:0-0:3853225:140670816544576 prefill_worker.py:886] Prefill worker step took more than 3 seconds, FIND ME YING 9.307690858840942
2024-11-26 05:19:43,698 INFO prefiller:0-0:3853225:140670816544576 prefill_worker.py:886] Prefill worker step took more than 3 seconds, FIND ME YING 9.345906019210815
2024-11-26 05:20:13,719 INFO prefiller:0-0:3853225:140670816544576 prefill_worker.py:886] Prefill worker step took more than 3 seconds, FIND ME YING 9.400884866714478
2024-11-26 05:20:43,751 INFO prefiller:0-0:3853225:140670816544576 prefill_worker.py:886] Prefill worker step took more than 3 seconds, FIND ME YING 9.450013399124146
2024-11-26 05:21:13,7
@YLGH
YLGH / hi
Created April 26, 2020 17:05
#include <vector>
#include <iostream>
#include <algorithm>
#include <map>
using namespace std;
int T;
int N;
int K;
class Solution:
def minNumberOfFrogs(self, croakOfFrogs: str) -> int:
index = {}
for x in 'croak':
index[x] = []
for i,c in enumerate(croakOfFrogs):
index[c].append(i)
def foo():
ans = []
for c_index in index['c']:
class Solution:
def wordBreak(self, S, wordDict):
words = set(wordDict)
memo = {}
def dp(index):
if index == len(S):
return [""]
if index in memo:
return memo[index]
from collections import deque
def minKBitFlips(A,K):
cardinality = 0
count = 0
flip = [0 for _ in range(len(A) + 1)]
for i in range(len(A)):
cardinality += flip[i]
if (A[i] + cardinality) % 2 == 1:
continue
else:
class Solution:
def minKBitFlips(self, A: List[int], K: int) -> int:
cardinality = 0
count = 0
flip = [0 for _ in range(len(A)+1)]
for i in range(len(A)):
cardinality += flip[i]
if (A[i] + cardinality) % 2 == 1:
continue
else:
import heapq
class Solution:
def maxPerformance(self, N, speed, efficiency, K):
workers = sorted(list(zip(speed, efficiency)) ,key=lambda x: x[1])
sm = 0
ans = 0
hp = []
for i in range(len(workers)-1,-1,-1):
worker = workers[i]
if len(hp) < K:
class Solution:
def shortestCommonSupersequence(self, str1: str, str2: str) -> str:
str1 = [None]+[x for x in str1]
str2 = [None]+[x for x in str2]
M = len(str1)
N = len(str2)
# len, last char, pointer to parent
dp = [[ (None,None,None) for _ in range(N)] for _ in range(M)]