Skip to content

Instantly share code, notes, and snippets.

@jgarzik
Created August 14, 2014 13:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jgarzik/2e2c4373b88d90ee4859 to your computer and use it in GitHub Desktop.
Save jgarzik/2e2c4373b88d90ee4859 to your computer and use it in GitHub Desktop.
storing data in registers
#include <sys/types.h>
#include <sys/stat.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <time.h>
#include <stdint.h>
#include <stdbool.h>
#include <unistd.h>
#include <assert.h>
typedef struct uchar256 {
unsigned char data[256 / 8];
} uint256_t;
static bool f_mmx = false;
static bool f_sse2 = false;
static const bool f_64bit = (sizeof(void *) == 8);
static void hexdump(const char *name, const void *p_, size_t len)
{
const unsigned char *p = p_;
printf("%s: ", name);
unsigned int i;
for (i = 0; i < len; i++)
printf("%02x", p[i]);
printf("\n");
}
static bool rand_bytes(void *p, size_t len)
{
int fd = open("/dev/urandom", O_RDONLY);
if (fd < 0)
return false;
ssize_t bread = read(fd, p, len);
close(fd);
return (bread == len);
}
void read_u256_mmx(uint256_t *v, unsigned int idx)
{
uint64_t *vals = (uint64_t *) v;
if (idx == 0) {
asm volatile("movq %%mm0,%0" : "=m" (vals[0]));
asm volatile("movq %%mm1,%0" : "=m" (vals[1]));
asm volatile("movq %%mm2,%0" : "=m" (vals[2]));
asm volatile("movq %%mm3,%0" : "=m" (vals[3]));
} else {
asm volatile("movq %%mm4,%0" : "=m" (vals[0]));
asm volatile("movq %%mm5,%0" : "=m" (vals[1]));
asm volatile("movq %%mm6,%0" : "=m" (vals[2]));
asm volatile("movq %%mm7,%0" : "=m" (vals[3]));
}
}
void write_u256_mmx(const uint256_t *v, unsigned int idx)
{
const uint64_t *vals = (const uint64_t *) v;
if (idx == 0) {
asm volatile("movq %0,%%mm0" : : "m" (vals[0]));
asm volatile("movq %0,%%mm1" : : "m" (vals[1]));
asm volatile("movq %0,%%mm2" : : "m" (vals[2]));
asm volatile("movq %0,%%mm3" : : "m" (vals[3]));
} else {
asm volatile("movq %0,%%mm4" : : "m" (vals[0]));
asm volatile("movq %0,%%mm5" : : "m" (vals[1]));
asm volatile("movq %0,%%mm6" : : "m" (vals[2]));
asm volatile("movq %0,%%mm7" : : "m" (vals[3]));
}
}
void read_u256_sse(uint256_t *v_, unsigned int idx)
{
uint64_t *v = (uint64_t *) v_;
switch (idx) {
case 0:
asm volatile("movdqa %%xmm0,%0" : "=m" (v[0]));
asm volatile("movdqa %%xmm1,%0" : "=m" (v[2]));
break;
case 1:
asm volatile("movdqa %%xmm2,%0" : "=m" (v[0]));
asm volatile("movdqa %%xmm3,%0" : "=m" (v[2]));
break;
case 2:
asm volatile("movdqa %%xmm4,%0" : "=m" (v[0]));
asm volatile("movdqa %%xmm5,%0" : "=m" (v[2]));
break;
case 3:
asm volatile("movdqa %%xmm6,%0" : "=m" (v[0]));
asm volatile("movdqa %%xmm7,%0" : "=m" (v[2]));
break;
}
}
void write_u256_sse(const uint256_t *v_, unsigned int idx)
{
const uint64_t *v = (const uint64_t *) v_;
switch (idx) {
case 0:
asm volatile("movdqa %0,%%xmm0" : : "m" (v[0]));
asm volatile("movdqa %0,%%xmm1" : : "m" (v[2]));
break;
case 1:
asm volatile("movdqa %0,%%xmm2" : : "m" (v[0]));
asm volatile("movdqa %0,%%xmm3" : : "m" (v[2]));
break;
case 2:
asm volatile("movdqa %0,%%xmm4" : : "m" (v[0]));
asm volatile("movdqa %0,%%xmm5" : : "m" (v[2]));
break;
case 3:
asm volatile("movdqa %0,%%xmm6" : : "m" (v[0]));
asm volatile("movdqa %0,%%xmm7" : : "m" (v[2]));
break;
}
}
static void runit_mmx(void)
{
printf("MMX:\n");
uint256_t v __attribute__ ((aligned(32)));
uint256_t v2 __attribute__ ((aligned(32)));
unsigned int i;
for (i = 0; i < 2; i++) {
rand_bytes(&v, sizeof(v));
memset(&v2, 0, sizeof(v2));
write_u256_mmx(&v, i);
read_u256_mmx(&v2, i);
char s[16];
sprintf(s, "%u.a", i);
hexdump(s, &v, sizeof(v));
sprintf(s, "%u.b", i);
hexdump(s, &v2, sizeof(v2));
assert(!memcmp(&v, &v2, sizeof(v)));
}
}
static void runit_sse(void)
{
printf("SSE:\n");
uint256_t v __attribute__ ((aligned(32)));
uint256_t v2 __attribute__ ((aligned(32)));
unsigned int i;
for (i = 0; i < 4; i++) {
rand_bytes(&v, sizeof(v));
memset(&v2, 0, sizeof(v2));
write_u256_sse(&v, i);
read_u256_sse(&v2, i);
char s[16];
sprintf(s, "%u.a", i);
hexdump(s, &v, sizeof(v));
sprintf(s, "%u.b", i);
hexdump(s, &v2, sizeof(v2));
assert(!memcmp(&v, &v2, sizeof(v)));
}
}
static void cpuid(uint32_t code, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) {
uint32_t dummy = 0;
asm volatile("cpuid" :
"=a" (dummy), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) :
"a" (code), "c" (0));
}
static void detect(void)
{
uint32_t ecx = 0, ebx = 0, edx = 0;
cpuid(1, &ebx, &ecx, &edx);
f_mmx = edx & (1 << 23);
f_sse2 = edx & (1 << 26);
printf("CPU features:%s%s%s\n",
f_mmx ? " MMX" : "",
f_sse2 ? " SSE2" : "",
f_64bit ? " 64b" : " 32b");
}
int main(int argc, char *argv[])
{
detect();
if (f_mmx)
runit_mmx();
if (f_sse2)
runit_sse();
printf("success!\n");
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment