Skip to content

Instantly share code, notes, and snippets.

View lemire's full-sized avatar
🚀
working hard and fast

Daniel Lemire lemire

🚀
working hard and fast
View GitHub Profile
@lemire
lemire / straceresults.txt
Created March 29, 2019 13:01
strace results with and without jemalloc
// with jemalloc
$ strace ./lemirebenchmark
execve("./lemirebenchmark", ["./lemirebenchmark"], [/* 28 vars */]) = 0
brk(NULL) = 0x79d0000
openat(AT_FDCWD, "/usr/local/lib/libjemalloc.so.2", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0\267\0\1\0\0\0\340b\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=4887824, ...}) = 0
mmap(NULL, 1113224, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0xffffaa020000
mmap(0xffffaa090000, 131072, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x60000) = 0xffffaa090000
@lemire
lemire / straceresults.txt
Created March 29, 2019 13:01
strace results with and without jemalloc
// with jemalloc
$ strace ./lemirebenchmark
execve("./lemirebenchmark", ["./lemirebenchmark"], [/* 28 vars */]) = 0
brk(NULL) = 0x79d0000
openat(AT_FDCWD, "/usr/local/lib/libjemalloc.so.2", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0\267\0\1\0\0\0\340b\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=4887824, ...}) = 0
mmap(NULL, 1113224, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0xffffaa020000
mmap(0xffffaa090000, 131072, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x60000) = 0xffffaa090000
@lemire
lemire / zigzag.c
Created May 7, 2019 20:58
zigzag encoding/decoding
#include <string.h>
#include <stdint.h>
static inline
uint32_t _zigzag_encode_32 (int32_t val) {
return (val + val) ^ (val >> 31);
}
void zigzag_encode(const int32_t * in, uint32_t * out, size_t N) {
for(size_t i = 0; i < N; i++)
@lemire
lemire / allocator.h
Created May 9, 2019 19:00
prototype allocator
#ifndef SIMDJSON_ALLOCATOR_H
#define SIMDJSON_ALLOCATOR_H
#include "common_defs.h"
#include <cstddef>
#include <cstdint>
#include <limits>
template <class T> class padded_allocator {
public:
@lemire
lemire / fpvalgrind.c
Last active May 19, 2019 14:49
Compile this and run it through valgrind 3.13.0
// gcc -o t2 t2.c -mavx2
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#include <x86intrin.h>
__attribute__((no_sanitize("memory")))
__attribute__ ((noinline))
bool hash_zero_byte(char *va) {
__m256i v = _mm256_loadu_si256((__m256i *) va);
def f(x):
r1 = (x + 212) % 256
r2 = x | 32
table = [44, 125, 0, 0, 192, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 123 ]
if ((r1&0x80)==0x80):
r3 = 0
else:
r3 = table[r1& 0xF]
r4 = r2 == r3
return r4
@lemire
lemire / attributepush_namespace.cpp
Created July 27, 2019 01:40
LLVM, attribute push and namespace... bad interactions?
// the following works fine under clang:
#pragma clang attribute push(__attribute__((target("sse4.2,pclmul"))), apply_to=function)
void f(){}
#pragma clang attribute pop
////////////
/// The following DOES NOT WORK: 'error: expected unqualified-id'
//////////
namespace joe {
// this is more or less a port of travis_avx2_w to plain C
namespace {
// this is __m256i for people who don't have avx
struct poor__m256i {
uint32_t val[8];
};
#ifndef really_inline
@lemire
lemire / gosetbenchmark_test.go
Created November 15, 2019 02:12
Memory usage in Go
package Main
import (
"math"
"reflect"
"testing"
"github.com/RoaringBitmap/roaring"
"github.com/willf/bitset"
)
// prefetching can be useful for large documents
// where page walking becomes a significant overhead
#ifdef _MSC_VER
// annoyingly, Visual Studio does not appear to have a portable
// prefetch
#ifdef IS_ARM64
inline void prefetch(const void * p) {
__prefetch(p);
}