Skip to content

Instantly share code, notes, and snippets.

$ cat test_adds_epu16.c
#include <emmintrin.h>
__m128i x(__m128i a, __m128i b)
{
return _mm_adds_epu16(a, b);
}
$ gcc -O3 -c test_adds_epu16.c
$ objdump -d test_adds_epu16.o
#include <stdint.h>
#include <assert.h>
#include <emmintrin.h>
uint32_t foo(uint32_t a)
{
__m128i xa = _mm_cvtsi32_si128(a);
return _mm_cvtsi128_si32(_mm_packus_epi16(xa, xa));
}
Some oprofile log snippets for https://plus.google.com/u/0/100242854243155306943/posts/KWcGxqFemfV
Run on ARM Cortex-A8 1GHz, videotestsrc used instead of v4l2src (no camera here)
----------------------------------------------------------------------------
# opcontrol --deinit
# opcontrol --separate=kernel --no-vmlinux
# opcontrol --init
# opcontrol --reset
@ssvb
ssvb / its_script
Created November 13, 2012 18:39
mkimage script for creating vmlinuz file from zImage for ARM Chromebook
/dts-v1/;
/ {
description = "Chrome OS kernel image with one or more FDT blobs";
#address-cells = <1>;
images {
kernel@1 {
data = /incbin/("arch/arm/boot/zImage");
type = "kernel_noload";
@ssvb
ssvb / gist:4117583
Created November 20, 2012 12:11
DirectFB benchmark on Mele A2000 (Allwinner A10)
Graphics acceleration benchmark on Allwinner A10 (1GHz, 2x16-bit DDR3 @480MHz)
DirectFB 1.4.11 patched with https://github.com/allwinner-ics/lichee_buildroot/blob/master/package/directfb/directfb-1.4.11-vmware.patch
32bpp color depth, 640x480 screen resolution
----------------------------------------------------------------------------------------------------------------------------------------
./df_dok --noaccel --system ./df_dok (G2D acceleration) pixman-0.28.0
----------------------------------------------------------------------------------------------------------------------------------------
Anti-aliased Text ( 271.765 KChars/sec) [100.0%] ( 139.840 KChars/sec) [100.0%]
Anti-aliased Text (blend) ( 49.202 KChars/sec) [100.0%] ( 33.455 KChars/sec) [100.0%]
Fill Rectangle ( 326.699 MPixel/sec) [100.3%] (* 200.442 MPixel/sec) [ 2.0%] 359.
$ gcc -v
Using built-in specs.
COLLECT_GCC=gcc-4.7.2
COLLECT_LTO_WRAPPER=/usr/libexec/gcc/x86_64-pc-linux-gnu/4.7.2/lto-wrapper
Target: x86_64-pc-linux-gnu
Configured with: /var/tmp/portage/sys-devel/gcc-4.7.2/work/gcc-4.7.2/configure --prefix=/usr --bindir=/usr/x86_64-pc-linux-gnu/gcc-bin/4.7.2 --includedir=/usr/lib/gcc/x86_64-pc-linux-gnu/4.7.2/include --datadir=/usr/share/gcc-data/x86_64-pc-linux-gnu/4.7.2 --mandir=/usr/share/gcc-data/x86_64-pc-linux-gnu/4.7.2/man --infodir=/usr/share/gcc-data/x86_64-pc-linux-gnu/4.7.2/info --with-gxx-include-dir=/usr/lib/gcc/x86_64-pc-linux-gnu/4.7.2/include/g++-v4 --host=x86_64-pc-linux-gnu --build=x86_64-pc-linux-gnu --disable-altivec --disable-fixed-point --without-ppl --without-cloog --enable-lto --enable-nls --without-included-gettext --with-system-zlib --enable-obsolete --disable-werror --enable-secureplt --enable-multilib --with-multilib-list=m32,m64 --enable-libmudflap --disable-libssp --enable-libgomp --with-python-dir=/share/gcc-data/x86_64-pc-linux-gnu/4.7.2/py
@ssvb
ssvb / gist:4426358
Created January 1, 2013 10:19
Profiling RetroArch (sdl)
Tasks: 61 total, 2 running, 59 sleeping, 0 stopped, 0 zombie
Cpu(s): 98.7%us, 1.3%sy, 0.0%ni, 0.0%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
Mem: 315004k total, 115304k used, 199700k free, 24k buffers
Swap: 0k total, 0k used, 0k free, 67692k cached
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
1417 root 20 0 59264 25m 6956 S 86.6 8.2 2:42.63 retroarch
870 root 20 0 41208 15m 7444 R 12.5 5.0 0:29.89 X
1037 root 20 0 8968 3128 2484 S 0.3 1.0 0:00.24 sshd
1481 root 20 0 2504 1152 920 R 0.3 0.4 0:00.05 top
@ssvb
ssvb / gist:4426402
Created January 1, 2013 10:29
RetroArch with gl
# Overhead Command Shared Object Symbol
# ........ ......... .................. ...............................
#
38.75% retroarch libGL.so.1.5.08004 [.] linear_texel_locations
22.67% retroarch libGL.so.1.5.08004 [.] _swrast_texture_span
13.30% retroarch libGL.so.1.5.08004 [.] lerp_rgba_2d
7.53% retroarch libGL.so.1.5.08004 [.] shade_texture_span
6.75% retroarch libGL.so.1.5.08004 [.] fetch_texel_2d_f_rgba8888
5.96% retroarch libGL.so.1.5.08004 [.] sample_2d_linear.isra.27
0.85% retroarch libGL.so.1.5.08004 [.] pack_row_ubyte_ARGB8888
@ssvb
ssvb / gist:4462216
Created January 5, 2013 16:04
xorg.conf for xf86-video-fbdev
sun4i ~ # cat /etc/X11/xorg.conf
Section "Device"
Identifier "FBDEV"
Driver "fbdev"
Option "fbdev" "/dev/fb0"
# Option "ShadowFB" "false"
EndSection
Section "ServerFlags"
Option "BlankTime" "0"
[product]
version = "1.0"
machine = "A10-EVB-V1.1"
[target]
boot_clock = 1008
dcdc2_vol = 1400
dcdc3_vol = 1250
ldo2_vol = 3000
ldo3_vol = 2800