Skip to content

Instantly share code, notes, and snippets.

@samson-wang
Last active February 19, 2019 09:56
Show Gist options
  • Save samson-wang/39516d1e40caae19ea12a6b64bfbd598 to your computer and use it in GitHub Desktop.
Save samson-wang/39516d1e40caae19ea12a6b64bfbd598 to your computer and use it in GitHub Desktop.
float multiplication test.
Minimum float positive value: 1.175494E-38
A: 0.403069, HEX: 0x3ece5f18
A: 4.0306925774e-01, HEX: 0x3ece5f18
A: 3.7674255964e-06, HEX: 0x367cd3e1
A: 3.8132049561e+01, HEX: 0x42188738
real 0m0.057s
user 0m0.056s
sys 0m0.000s
Minimum float positive value: 1.175494E-38
A: 0.403069, HEX: 0x3ece5f18
A: 4.0306921002e-39, HEX: 0x2be3ee
A: 3.7835058537e-44, HEX: 0x1b
A: 4.0263963917e-37, HEX: 0x30902dc
real 0m0.622s
user 0m0.620s
sys 0m0.000s
Minimum float positive value: 1.175494E-38
A: 0.403069, HEX: 0x3ece5f18
A: 4.0306925774e-01, HEX: 0x3ece5f18
A: 3.7674255964e-06, HEX: 0x367cd3e1
A: 3.8132049561e+01, HEX: 0x42188738
real 0m0.025s
user 0m0.024s
sys 0m0.000s
Minimum float positive value: 1.175494E-38
A: 0.403069, HEX: 0x3ece5f18
A: 4.0306921002e-39, HEX: 0x2be3ee
A: 3.7835058537e-44, HEX: 0x1b
A: 4.0263963917e-37, HEX: 0x30902dc
Minimum float positive value: 1.175494E-38
A: 0.403069, HEX: 0x3ece5f18
A: 4.0306925774e-01, HEX: 0x3ece5f18
A: 3.7674255964e-06, HEX: 0x367cd3e1
A: 3.8132049561e+01, HEX: 0x42188738
real 0m0.015s
user 0m0.012s
sys 0m0.000s
Minimum float positive value: 1.175494E-38
A: 0.403069, HEX: 0x3ece5f18
A: 4.0306921002e-39, HEX: 0x2be3ee
A: 3.7835058537e-44, HEX: 0x1b
A: 4.0263963917e-37, HEX: 0x30902dc
real 0m0.540s
user 0m0.540s
sys 0m0.000s
Minimum float positive value: 1.175494E-38
A: 0.403069, HEX: 0x3ece5f18
A: 4.0306925774e-01, HEX: 0x3ece5f18
A: 3.7674255964e-06, HEX: 0x367cd3e1
A: 3.8132049561e+01, HEX: 0x42188738
real 0m0.018s
user 0m0.016s
sys 0m0.000s
Minimum float positive value: 1.175494E-38
A: 0.403069, HEX: 0x3ece5f18
A: 0.0000000000e+00, HEX: 0x0
A: 0.0000000000e+00, HEX: 0x0
A: 0.0000000000e+00, HEX: 0x0
real 0m0.016s
user 0m0.012s
sys 0m0.004s
#!/bin/bash
rm ./a.out
gcc test_float_mul.c
time ./a.out 10000000
time ./a.out 10000000 small
gcc test_float_mul.c -S -o test_float_mul_O0.s
rm ./a.out
gcc -O1 test_float_mul.c
time ./a.out 10000000
time ./a.out 10000000 small
gcc test_float_mul.c -S -o test_float_mul_O1.s
#!/bin/bash
rm ./a_icc.out
icc -o a_icc.out -O0 test_float_mul.c
time ./a.out 10000000
time ./a_icc.out 10000000 small
icc test_float_mul.c -S -O0 -o icc_test_float_mul_O0.s
rm ./a_icc.out
icc -o a_icc.out -O1 test_float_mul.c
time ./a.out 10000000
time ./a_icc.out 10000000 small
icc test_float_mul.c -S -O1 -o icc_test_float_mul_O1.s
#include <stdio.h>
#include <stdint.h>
#include <inttypes.h>
#include <math.h>
#include <stdlib.h>
//#include <xmmintrin.h>
//#define _MM_DENORMALS_ZERO_MASK 0x0040
//#define _MM_DENORMALS_ZERO_ON 0x0040
//#define _MM_DENORMALS_ZERO_OFF 0x0000
//
//#define _MM_SET_DENORMALS_ZERO_MODE(mode) \
// _mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (mode))
//#define _MM_GET_DENORMALS_ZERO_MODE() \
// (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
//_mm_setcsr( _mm_getcsr() | 0x8040 );
//_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
//#include <fenv.h>
//fesetenv(FE_DFL_DISABLE_SSE_DENORMS_ENV);
#include <float.h>
float foo(float a, float b, int count) {
float tmp = 0.;
for (int i = 0; i < count; i ++) {
tmp += a * b;
}
return tmp;
}
int main(int argc, char** argv) {
int count = 100000;
if (argc > 1) {
count = (int)atoi(argv[1]);
}
printf("Minimum float positive value: %E\n", FLT_MIN );
float a = 0.40306925773620605;
union {
float f;
uint32_t u;
} f2u = { .f = a};
printf("A: %lf, HEX: 0x%" PRIx32 "\n", a, f2u.u);
if (argc > 2) {
a = a * 1e-38;
f2u.f = a;
}
printf("A: %.10e, HEX: 0x%" PRIx32 "\n", a, f2u.u);
float b = 0.9346844673156738 * 1e-5;
f2u.f = a * b;
printf("A: %.10e, HEX: 0x%" PRIx32 "\n", a * b, f2u.u);
float tmp = foo(a, b, count);
f2u.f = tmp;
printf("A: %.10e, HEX: 0x%" PRIx32 "\n", tmp, f2u.u);
}
import torch
import torch.nn as nn
import torch.nn.functional as F
import time
torch.random.manual_seed(3)
alpha = torch.rand(1)
x = torch.randn((32, 256, 64, 64))
m = nn.Conv2d(256, 256, 3, 1, padding=1)
print x[0][1]
NUM = 10
st = time.time()
for _ in xrange(NUM):
F.prelu(x, alpha)
# x = m(x)
print (time.time() - st) / NUM
alpha = alpha * 1e-40
st = time.time()
for _ in xrange(NUM):
F.prelu(x, alpha)
# x = m(x)
print (time.time() - st) / NUM
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment