Skip to content

Instantly share code, notes, and snippets.

@nkurz
nkurz / align.c
Created September 21, 2014 05:12
Sample file showing timings for several alignment implementations.
// gcc -fno-inline -std=gnu99 -Wall -O3 align.c -o align -lm -DLIKWID -llikwid -lpthread
// objdump -d align | less (to confirm that the code hasn't been optimized out)
// likwid -m -C2 -g BRANCH align
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#ifndef ALIGN
#define ALIGN 8
@nkurz
nkurz / constant_division.c
Created September 25, 2014 20:54
Test how well compilers optimize division by constants
// Code to test how well compilers optimize division by constants
// See: http://ridiculousfish.com/blog/posts/labor-of-division-episode-iii.html
// gcc -std=gnu11 -Wall -Wextra -O3 constant_division.c -o constant_division
// objdump -d constant_division | less
#ifndef COUNT
#define COUNT (1000 * 1000)
#endif
@nkurz
nkurz / constant_division.objdump
Created September 25, 2014 21:15
Objdump of different compilers for constant_division.c
GCC 4.8.1
4019f0: 66 0f 6f 00 movdqa (%rax),%xmm0
4019f4: 48 83 c0 10 add $0x10,%rax
4019f8: 48 39 c5 cmp %rax,%rbp
4019fb: 66 0f 6f c8 movdqa %xmm0,%xmm1
4019ff: 66 0f 6f e0 movdqa %xmm0,%xmm4
401a03: 66 0f 62 c8 punpckldq %xmm0,%xmm1
401a07: 66 0f 6a e0 punpckhdq %xmm0,%xmm4
401a0b: 66 0f f4 cb pmuludq %xmm3,%xmm1
401a0f: 66 0f f4 e3 pmuludq %xmm3,%xmm4
@nkurz
nkurz / same-function.c
Created October 8, 2014 21:27
Identical loops that execute in different but consistent times
// gcc -std=gnu99 -O3 -Wall -Wextra same-function.c -o same-function
// Identical loops that execute in different but consistent times
#if COPY_AND_RUN_TO_TEST
for n in 0 1 2 3 4 5 6 7 8 9;
do echo same-function ${n}:;
/usr/bin/time -f "%e seconds" same-function ${n};
/usr/bin/time -f "%e seconds" same-function ${n};
/usr/bin/time -f "%e seconds" same-function ${n};
done
@nkurz
nkurz / sub.asm
Last active August 29, 2015 14:07
Switching from SUB to SBB changes runtime by 15%. Can you explain why?
; Minimal example, see also http://stackoverflow.com/q/26266953/3766665
; To build (Linux):
; nasm -felf64 func.asm
; ld func.o
; Then run:
; perf stat -r10 ./a.out
; On Haswell and Sandy Bridge, observed runtime varies
; ~15% depending on whether sub or sbb is used in the loop
section .text
global _start
@nkurz
nkurz / broadcast.c
Created October 25, 2014 01:13
Illustrate the performance difference between vector broadcast from memory vs load and shuffle
// cc -fno-inline -g -march=native -std=gnu99 -O3 -Wall -Wextra broadcast.c -o broadcast
// works with 'gcc 4.8.2' and 'icc 14.03', but crashes with 'clang 3.4' because of alignment
// usage: broadcast [-r repeat] [-s size]
#ifdef LIKWID
#include <likwid.h>
#else
#define likwid_markerInit()
#define likwid_markerThreadInit()
#define likwid_markerStartRegion(name)
// Calculate cycles spent on overhead of function calls
// See http://cs.coloradocollege.edu/~bylvisaker/CallReturn/
// gcc -g -std=gnu99 -O3 -Wall -Wextra call-return.c -o call-return
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#define DEFAULT_LOOP_COUNT (1000 * 1000)
@nkurz
nkurz / c.c
Created December 22, 2014 23:30
// C implementation for Pathfinding Benchmark by nate@verse.com
// See https://github.com/logicchains/LPATHBench for details
// Summary of benchmarks (see bottom for full numbers)
// 8981 LANGUAGE C 623
// 8981 LANGUAGE C++/clang 734
// 8981 LANGUAGE C++/gcc 755
// Best results compiling with GCC 4.7 or 4.8 -O2
// clang, icc and GCC 4.9 slightly worse with -O1, -O2, -O3, -Ofast
// -O3 and -Ofast much worse for all GCC. -O1 mixed but worse.
// gcc -march=native -g -std=gnu99 -Wall -Wextra -O3 symmetric.c -o symmetric -DUSE_ALG
// (where USE_ALG is one of USE_NATE, USE_KARIM, USE_BASIC, or USE_CONDITIONAL
// Or if using https://code.google.com/p/likwid/ with -m markers:
// gcc -march=native -g -std=gnu99 -Wall -Wextra -O3 symmetric.c -o symmetric -DLIKWID -llikwid -lpthread -lm -DUSE_ALG
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
--- R-3.2.1/src/main/memory.c.orig 2015-07-31 23:15:07.017151621 -0700
+++ R-3.2.1/src/main/memory.c 2015-07-31 23:17:10.185150073 -0700
@@ -3724,11 +3724,21 @@
static FILE *R_MemReportingOutfile;
static R_size_t R_MemReportingThreshold;
+static void printLineNum(FILE *file, SEXP srcref) {
+ if (srcref && !isNull(srcref)) {
+ int line = asInteger(srcref);
+ fprintf(file, "#%d ", line);