samson-wang/result.log

## result.log
Minimum float positive value: 1.175494E-38
A: 0.403069, HEX: 0x3ece5f18
A: 4.0306925774e-01, HEX: 0x3ece5f18
A: 3.7674255964e-06, HEX: 0x367cd3e1
A: 3.8132049561e+01, HEX: 0x42188738

real	0m0.057s
user	0m0.056s
sys	0m0.000s
Minimum float positive value: 1.175494E-38
A: 0.403069, HEX: 0x3ece5f18
A: 4.0306921002e-39, HEX: 0x2be3ee
A: 3.7835058537e-44, HEX: 0x1b
A: 4.0263963917e-37, HEX: 0x30902dc

real	0m0.622s
user	0m0.620s
sys	0m0.000s
Minimum float positive value: 1.175494E-38
A: 0.403069, HEX: 0x3ece5f18
A: 4.0306925774e-01, HEX: 0x3ece5f18
A: 3.7674255964e-06, HEX: 0x367cd3e1
A: 3.8132049561e+01, HEX: 0x42188738

real	0m0.025s
user	0m0.024s
sys	0m0.000s
Minimum float positive value: 1.175494E-38
A: 0.403069, HEX: 0x3ece5f18
A: 4.0306921002e-39, HEX: 0x2be3ee
A: 3.7835058537e-44, HEX: 0x1b
A: 4.0263963917e-37, HEX: 0x30902dc

## result_icc.log
Minimum float positive value: 1.175494E-38
A: 0.403069, HEX: 0x3ece5f18
A: 4.0306925774e-01, HEX: 0x3ece5f18
A: 3.7674255964e-06, HEX: 0x367cd3e1
A: 3.8132049561e+01, HEX: 0x42188738

real	0m0.015s
user	0m0.012s
sys	0m0.000s
Minimum float positive value: 1.175494E-38
A: 0.403069, HEX: 0x3ece5f18
A: 4.0306921002e-39, HEX: 0x2be3ee
A: 3.7835058537e-44, HEX: 0x1b
A: 4.0263963917e-37, HEX: 0x30902dc

real	0m0.540s
user	0m0.540s
sys	0m0.000s
Minimum float positive value: 1.175494E-38
A: 0.403069, HEX: 0x3ece5f18
A: 4.0306925774e-01, HEX: 0x3ece5f18
A: 3.7674255964e-06, HEX: 0x367cd3e1
A: 3.8132049561e+01, HEX: 0x42188738

real	0m0.018s
user	0m0.016s
sys	0m0.000s
Minimum float positive value: 1.175494E-38
A: 0.403069, HEX: 0x3ece5f18
A: 0.0000000000e+00, HEX: 0x0
A: 0.0000000000e+00, HEX: 0x0
A: 0.0000000000e+00, HEX: 0x0

real	0m0.016s
user	0m0.012s
sys	0m0.004s

## run.sh
#!/bin/bash
rm ./a.out
gcc  test_float_mul.c
time ./a.out 10000000
time ./a.out 10000000 small

gcc test_float_mul.c -S -o test_float_mul_O0.s

rm ./a.out
gcc  -O1 test_float_mul.c
time ./a.out 10000000
time ./a.out 10000000 small

gcc test_float_mul.c -S -o test_float_mul_O1.s

## run_icc.sh
#!/bin/bash
rm ./a_icc.out
icc  -o a_icc.out -O0 test_float_mul.c
time ./a.out 10000000
time ./a_icc.out 10000000 small

icc test_float_mul.c -S -O0 -o icc_test_float_mul_O0.s

rm ./a_icc.out
icc  -o a_icc.out -O1 test_float_mul.c
time ./a.out 10000000
time ./a_icc.out 10000000 small

icc test_float_mul.c -S -O1 -o icc_test_float_mul_O1.s

## test_float_mul.c
#include <stdio.h>
#include <stdint.h>
#include <inttypes.h>
#include <math.h>
#include <stdlib.h>
//#include <xmmintrin.h>
//#define _MM_DENORMALS_ZERO_MASK   0x0040
//#define _MM_DENORMALS_ZERO_ON     0x0040
//#define _MM_DENORMALS_ZERO_OFF    0x0000
//
//#define _MM_SET_DENORMALS_ZERO_MODE(mode)                                   \
//            _mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (mode))
//#define _MM_GET_DENORMALS_ZERO_MODE()                                       \
//            (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
//_mm_setcsr( _mm_getcsr() | 0x8040 );
//_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
//#include <fenv.h>
//fesetenv(FE_DFL_DISABLE_SSE_DENORMS_ENV);
#include <float.h>

float foo(float a, float b, int count) {
    float tmp = 0.;
    for (int i = 0; i < count; i ++) {
        tmp += a * b;
    }
    return tmp;
}

int main(int argc, char** argv) {

    int count = 100000;

    if (argc > 1) {
        count = (int)atoi(argv[1]);
    }
    printf("Minimum float positive value: %E\n", FLT_MIN );

    float a = 0.40306925773620605;
    union {
        float f;
        uint32_t u;
    } f2u = { .f = a};

    printf("A: %lf, HEX: 0x%" PRIx32 "\n", a, f2u.u);

    if (argc > 2) {
        a = a * 1e-38;
        f2u.f = a;
    }
    printf("A: %.10e, HEX: 0x%" PRIx32 "\n", a, f2u.u);

    float b = 0.9346844673156738 * 1e-5;

    f2u.f = a * b;
    printf("A: %.10e, HEX: 0x%" PRIx32 "\n", a * b, f2u.u);

    float tmp = foo(a, b, count);
    f2u.f = tmp;
    printf("A: %.10e, HEX: 0x%" PRIx32 "\n", tmp, f2u.u);

}

## test_prelu_pytorch.py
import torch
import torch.nn as nn
import torch.nn.functional as F
import time

torch.random.manual_seed(3)
alpha = torch.rand(1)

x = torch.randn((32, 256, 64, 64))

m = nn.Conv2d(256, 256, 3, 1, padding=1)
print x[0][1]
NUM = 10
st = time.time()
for _ in xrange(NUM):
    F.prelu(x, alpha)
#    x = m(x)


print (time.time() - st) / NUM

alpha = alpha * 1e-40
st = time.time()
for _ in xrange(NUM):
    F.prelu(x, alpha)
#    x = m(x)


print (time.time() - st) / NUM
	Minimum float positive value: 1.175494E-38
	A: 0.403069, HEX: 0x3ece5f18
	A: 4.0306925774e-01, HEX: 0x3ece5f18
	A: 3.7674255964e-06, HEX: 0x367cd3e1
	A: 3.8132049561e+01, HEX: 0x42188738

	real 0m0.057s
	user 0m0.056s
	sys 0m0.000s
	Minimum float positive value: 1.175494E-38
	A: 0.403069, HEX: 0x3ece5f18
	A: 4.0306921002e-39, HEX: 0x2be3ee
	A: 3.7835058537e-44, HEX: 0x1b
	A: 4.0263963917e-37, HEX: 0x30902dc

	real 0m0.622s
	user 0m0.620s
	sys 0m0.000s
	Minimum float positive value: 1.175494E-38
	A: 0.403069, HEX: 0x3ece5f18
	A: 4.0306925774e-01, HEX: 0x3ece5f18
	A: 3.7674255964e-06, HEX: 0x367cd3e1
	A: 3.8132049561e+01, HEX: 0x42188738

	real 0m0.025s
	user 0m0.024s
	sys 0m0.000s
	Minimum float positive value: 1.175494E-38
	A: 0.403069, HEX: 0x3ece5f18
	A: 4.0306921002e-39, HEX: 0x2be3ee
	A: 3.7835058537e-44, HEX: 0x1b
	A: 4.0263963917e-37, HEX: 0x30902dc
	#!/bin/bash
	rm ./a.out
	gcc test_float_mul.c
	time ./a.out 10000000
	time ./a.out 10000000 small

	gcc test_float_mul.c -S -o test_float_mul_O0.s

	rm ./a.out
	gcc -O1 test_float_mul.c
	time ./a.out 10000000
	time ./a.out 10000000 small

	gcc test_float_mul.c -S -o test_float_mul_O1.s
	#!/bin/bash
	rm ./a_icc.out
	icc -o a_icc.out -O0 test_float_mul.c
	time ./a.out 10000000
	time ./a_icc.out 10000000 small

	icc test_float_mul.c -S -O0 -o icc_test_float_mul_O0.s

	rm ./a_icc.out
	icc -o a_icc.out -O1 test_float_mul.c
	time ./a.out 10000000
	time ./a_icc.out 10000000 small

	icc test_float_mul.c -S -O1 -o icc_test_float_mul_O1.s
	#include <stdio.h>
	#include <stdint.h>
	#include <inttypes.h>
	#include <math.h>
	#include <stdlib.h>
	//#include <xmmintrin.h>
	//#define _MM_DENORMALS_ZERO_MASK 0x0040
	//#define _MM_DENORMALS_ZERO_ON 0x0040
	//#define _MM_DENORMALS_ZERO_OFF 0x0000
	//
	//#define _MM_SET_DENORMALS_ZERO_MODE(mode) \
	// _mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) \| (mode))
	//#define _MM_GET_DENORMALS_ZERO_MODE() \
	// (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
	//_mm_setcsr( _mm_getcsr() \| 0x8040 );
	//_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
	//#include <fenv.h>
	//fesetenv(FE_DFL_DISABLE_SSE_DENORMS_ENV);
	#include <float.h>

	float foo(float a, float b, int count) {
	float tmp = 0.;
	for (int i = 0; i < count; i ++) {
	tmp += a * b;
	}
	return tmp;
	}

	int main(int argc, char** argv) {

	int count = 100000;

	if (argc > 1) {
	count = (int)atoi(argv[1]);
	}
	printf("Minimum float positive value: %E\n", FLT_MIN );

	float a = 0.40306925773620605;
	union {
	float f;
	uint32_t u;
	} f2u = { .f = a};

	printf("A: %lf, HEX: 0x%" PRIx32 "\n", a, f2u.u);

	if (argc > 2) {
	a = a * 1e-38;
	f2u.f = a;
	}
	printf("A: %.10e, HEX: 0x%" PRIx32 "\n", a, f2u.u);

	float b = 0.9346844673156738 * 1e-5;

	f2u.f = a * b;
	printf("A: %.10e, HEX: 0x%" PRIx32 "\n", a * b, f2u.u);

	float tmp = foo(a, b, count);
	f2u.f = tmp;
	printf("A: %.10e, HEX: 0x%" PRIx32 "\n", tmp, f2u.u);

	}
	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	import time

	torch.random.manual_seed(3)
	alpha = torch.rand(1)

	x = torch.randn((32, 256, 64, 64))

	m = nn.Conv2d(256, 256, 3, 1, padding=1)
	print x[0][1]
	NUM = 10
	st = time.time()
	for _ in xrange(NUM):
	F.prelu(x, alpha)
	# x = m(x)


	print (time.time() - st) / NUM

	alpha = alpha * 1e-40
	st = time.time()
	for _ in xrange(NUM):
	F.prelu(x, alpha)
	# x = m(x)


	print (time.time() - st) / NUM