Skip to content

Instantly share code, notes, and snippets.

View primenumber's full-sized avatar
:octocat:
poyo

prime number primenumber

:octocat:
poyo
View GitHub Profile
@primenumber
primenumber / bsr.cpp
Last active June 14, 2023 18:37
Parallel bit scan reverse
// author: prime (prime@kmc.gr.jp)
// License: MIT License
#include <iostream>
#include <vector>
#include <bitset>
#include <x86intrin.h>
#include <boost/timer/timer.hpp>
inline __m256i bsr_256_8_naive(__m256i x);
inline __m256i bsr_256_8_cvtfloat(__m256i x);
@primenumber
primenumber / clpeak
Last active November 30, 2022 14:35
Intel Arc A770 benchmark result
Platform: Intel(R) OpenCL HD Graphics
Device: Intel(R) Graphics [0x56a0]
Driver version : 22.32.23937 (Linux x64)
Compute units : 512
Clock frequency : 2400 MHz
Global memory bandwidth (GBPS)
float : 396.17
float2 : 405.79
@primenumber
primenumber / Bfi.bf
Created August 4, 2013 14:58
Brainf*ckで書かれたBrainf*ckインタプリタです. 標準入力から\0に到達するまでをBrainf*ckプログラムとみなし,到達した後を標準入力とみなします. このインタプリタを噛ませることによりだいたい数百×プログラム長倍遅くなります.
>>>>,[>+>>,]<<[<<<]>>>-<[[>>+>>>+<<<<<-]>>[<<+>>-]++++++[<<------->>-]+<<-[[-]>>-<<]>>>>>[<<<<<+>>>>>-]<<<[>>[>>>]>>>[>>>]<+<<[<<<]<<<[<<<]>-]<<[>>+>>>+<<<<<-]>>[<<+>>-]++++++[<<------->>-]+<<--[[-]>>-<<]>>>>>[<<<<<+>>>>>-]<<<[>>[>>>]>>>[>>>]<,<<[<<<]<<<[<<<]>-]<<[>>+>>>+<<<<<-]>>[<<+>>-]+++++[<<--------->>-]+<<[[-]>>-<<]>>>>>[<<<<<+>>>>>-]<<<[>>[>>>]>>>[>>>]<-<<[<<<]<<<[<<<]>-]<<[>>+>>>+<<<<<-]>>[<<+>>-]+++++[<<--------->>-]+<<-[[-]>>-<<]>>>>>[<<<<<+>>>>>-]<<<[>>[>>>]>>>[>>>]<.<<[<<<]<<<[<<<]>-]<<[>>+>>>+<<<<<-]>>[<<+>>-]++++++[<<---------->>-]+<<[[-]>>-<<]>>>>>[<<<<<+>>>>>-]<<<[>>[>>>]>>>[>>>]+<<<-<<<[<<<]<<<[<<<]>-]<<[>>+>>>+<<<<<-]>>[<<+>>-]++++++++[<<-------->>-]+<<++[[-]>>-<<]>>>>>[<<<<<+>>>>>-]<<<[>>[>>>]>>>[>>>]+>>>[-]<<<[<<<]<<<[<<<]>-]<<[>>+>>>+<<<<<-]>>[<<+>>-]+++++++[<<------------->>-]+<<[[-]>>-<<]>>>>>[<<<<<+>>>>>-]<<<[->>[>>>]>>>[>>>]>+<<[[<+>-]>>-<<]<[>+<-]>>>[-<<<<[<<<]<<<[<<<]>+[<+>>>-<[>>+>>>+<<<<<-]>>[<<+>>-]+++++++[<<------------->>-]+<<[[-]>>-<<]>>>>>[<<<<<+>>>>>-]<<<[<<<+>>>-]<<[>>+>>>+
>>>>++++>+>+>+++<<<
[
>[
<-[
[>>>>+>>>>+>>>>+<<<<<<<<<<<<-]
>[>>>>+>>>>+>>>>+<<<<<<<<<<<<-]
>[>>>>+>>>>+>>>>+<<<<<<<<<<<<-]
>[>>>>+>>>>+>>>>+<<<<<<<<<<<<-]
<++++++>>+>->>>>>>>
[<<<<<<<<<<<<+>>>>>>>>>>>>-]
#include <bits/stdc++.h>
#include <variant>
#include <atcoder/all>
#define FOR(i,k,n) for(ll i=(k);i<(ll)(n);++i)
#define REP(i,n) FOR(i,0,n)
#define ALL(x) begin(x),end(x)
using namespace std;
using namespace std::string_literals;
using namespace atcoder;
12345678
8|________
7|________
6|________
5|___o*___
4|___*o___
3|________
2|________
1|________
12345678| black:2, white:2

Keybase proof

I hereby claim:

  • I am primenumber on github.
  • I am primenumber (https://keybase.io/primenumber) on keybase.
  • I have a public key whose fingerprint is 3E1F C759 158C C59C 94B5 6293 9E6F 6717 E092 576B

To claim this, I am signing this object:

@primenumber
primenumber / othello_solver.cu
Last active August 13, 2019 18:31
CUDA Othello Solver
// CUDA Othello Solver
// License: GPL 3
#include <cstdio>
#include <cassert>
constexpr int threadsPerBlock = 128;
constexpr int simdWidth = 4;
constexpr int nodesPerBlock = threadsPerBlock/simdWidth;
constexpr int MAX_DEPTH = 10;
def relu(x):
return max(x, 0)
def linear1(x, n):
return [x, 2.0 * x - 2 ** n]
def linear2(x):
return x[0] + -1.0 * x[1]
def block(x, n):
#include <iostream>
#include <x86intrin.h>
#include <boost/timer/timer.hpp>
inline __m128i mm_delta_swap_epi64(__m128i a, __m128i mask, int delta) {
__m128i x = _mm_and_si128(_mm_xor_si128(a, _mm_srli_epi64(a, delta)), mask);
return _mm_xor_si128(_mm_xor_si128(a, x), _mm_slli_epi64(x, delta));
}
inline __m128i mm_unpacklo_epb_unpack_dswap(__m128i a, __m128i b) {