Skip to content

Instantly share code, notes, and snippets.

View jcelerier's full-sized avatar
🏳️‍🌈
bash the fash

Jean-Michaël Celerier jcelerier

🏳️‍🌈
bash the fash
View GitHub Profile
size_t memcount_sse2(const void *s, int c, size_t n) {
__m128i cv = _mm_set1_epi8(c), sum = _mm_setzero_si128(), acr0,acr1,acr2,acr3;
const char *p,*pe;
for(p = s; p != (char *)s+(n- (n % (252*16)));) {
for(acr0 = acr1 = acr2 = acr3 = _mm_setzero_si128(),pe = p+252*16; p != pe; p += 64) {
acr0 = _mm_add_epi8(acr0, _mm_cmpeq_epi8(cv, _mm_loadu_si128((const __m128i *)p)));
acr1 = _mm_add_epi8(acr1, _mm_cmpeq_epi8(cv, _mm_loadu_si128((const __m128i *)(p+16))));
acr2 = _mm_add_epi8(acr2, _mm_cmpeq_epi8(cv, _mm_loadu_si128((const __m128i *)(p+32))));
acr3 = _mm_add_epi8(acr3, _mm_cmpeq_epi8(cv, _mm_loadu_si128((const __m128i *)(p+48)))); __builtin_prefetch(p+1024);
}
#include <tuple>
#include <initializer_list>
#include <math.h>
#include <utility>
using namespace std;
template <typename Tup1, typename Tup2, size_t...s>
double euclidean_distance_impl (Tup1 const &tup1, Tup2 const &tup2, index_sequence<s...>)
@daniel-j-h
daniel-j-h / really_strong_typedef.cc
Last active December 21, 2015 23:58
REALLY_STRONG_TYPEDEF
#include <type_traits>
#include <iostream>
#define REALLY_STRONG_TYPEDEF(From, To) \
class To final { \
static_assert(std::is_scalar<From>(), ""); \
From x; \
\
public: \
To() = default; \
@Manu343726
Manu343726 / gist:ca0ceb224ea789415387
Created September 19, 2015 18:15
Running ARM docker image with QEMU on x86_64 Arch Linux host
# Install quemu, docker, etc
yaourt -S qemu qemu-user-static binfmt-support
# The quemu-user-static AUR package is outdated and broken. The .deb package they pull is no longer in the ubuntu repository.
# Edit the PKGBUILD and use qemu-user-static_2.4+dfsg-3_amd64.deb (With SHA1 sum "84d83a16c60c82b6c579f2f750b04a3ac26c249b")
# Enable ARM emulation
update-binfmts --enable qemu-arm
@r-lyeh-archived
r-lyeh-archived / vector_view.cpp
Created April 21, 2015 15:38
vector_view implementation based off the tech string_view spec
// vector_view class, based on code by James exjam https://github.com/exjam/string_view
// rlyeh, public domain
#pragma once
#include <vector>
#if defined(_MSC_VER) && _MSC_VER < 1900
#define constexpr
#define noexcept
#endif
@philipz
philipz / gist:a7adca91bba7089ba47b
Created February 17, 2015 08:50
Run RPi Raspbian on Docker

docker run -it --rm -v /usr/bin/qemu-arm-static:/usr/bin/qemu-arm-static philipz/rpi-raspbian bash

@nocnokneo
nocnokneo / CMakeLists.txt
Last active January 4, 2024 03:34
VTK Rendered to an FBO in a Qt Quick 2 Scene Graph
cmake_minimum_required(VERSION 2.8.11)
project(VtkFboInQtQuick)
set(CMAKE_INCLUDE_CURRENT_DIR ON)
set(CMAKE_AUTOMOC ON)
find_package(VTK REQUIRED)
include(${VTK_USE_FILE})
@tfogo
tfogo / howtogif
Created April 8, 2014 13:19
How to convert videos to gif using ffmpeg and gifsicle
How to convert a video file to an animated GIF on Linux:
Install required software:
apt-get install ffmpeg gifsicle imagemagick:
Convert the video file to a series of small images:
mkdir /tmp/gif/
ffmpeg -i YOURVIDEOFILE.mp4 -r 10 /tmp/gif/out%04d.gif
Combine these images together into a GIF animation:
gifsicle –delay=10 –loop /tmp/gif/*.gif > animation.gif
Optimise the GIF animation so the file size is smaller:
@rygorous
rygorous / magic_ring.cpp
Created July 22, 2012 03:55
The magic ring buffer.
#define _CRT_SECURE_NO_DEPRECATE
#include <stdio.h>
#include <string.h>
#include <Windows.h>
// This allocates a "magic ring buffer" that is mapped twice, with the two
// copies being contiguous in (virtual) memory. The advantage of this is
// that this allows any function that expects data to be contiguous in
// memory to read from (or write to) such a buffer. It also means that
@hellerbarde
hellerbarde / latency.markdown
Created May 31, 2012 13:16 — forked from jboner/latency.txt
Latency numbers every programmer should know

Latency numbers every programmer should know

L1 cache reference ......................... 0.5 ns
Branch mispredict ............................ 5 ns
L2 cache reference ........................... 7 ns
Mutex lock/unlock ........................... 25 ns
Main memory reference ...................... 100 ns             
Compress 1K bytes with Zippy ............. 3,000 ns  =   3 µs
Send 2K bytes over 1 Gbps network ....... 20,000 ns  =  20 µs
SSD random read ........................ 150,000 ns  = 150 µs

Read 1 MB sequentially from memory ..... 250,000 ns = 250 µs