Skip to content

Instantly share code, notes, and snippets.

@miyo
Last active December 8, 2019 09:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save miyo/e8200ce82e1732a9cd4b2f21764f3b20 to your computer and use it in GitHub Desktop.
Save miyo/e8200ce82e1732a9cd4b2f21764f3b20 to your computer and use it in GitHub Desktop.
gccとg++でcomplex.hを使った場合の速度を調べてみた

CプログラムってC++コンパイラ(g++)でもコンパイルできるよね, その場合,生成されるコードってどのくらい違うのだろう?実行時間は? という疑問に対して複素数扱うプログラムの場合で調べてみた,結果. 関連してC++のcomplexを使う場合についての実行時間も調べてみた.

結果,CプログラムをC++コンパイラ(g++)でコンパイルした場合, 関数名が違うくらいの,ほぼ同じコードが生成された. 一方で,なぜかg++で生成したバイナリの実行時間は gccで生成したバイナリの実行時間より長くなる傾向が見られた(なぜ?)

なお,デスクトップLinux環境上で適当に測定してるので,実行時間にゆらぎはある.

  • 01_env - 使用環境
  • complex_c_func.c - 評価対象のカーネル
  • complex_c.c - 評価するためのガワ(complex_c_func.cをincludeしてる)
  • complex_c_func_gcc_g++.diff gccとg++で生成されるアセンブラコードの比較
  • complex_cpp.cpp - C++のcomplexを使用する場合
  • diff.sh - complex_c_func.cをgccとg++でコンパイルして生成されるアセンブラを比較する
  • test.sh - complex_c.cをgccとg++で,complex_cpp.cppをg++でコンパイルして実行時間を比較
  • elapsed_time_comparison.txt - test.shの実行結果
  • test2.sh - test.shに対して実行順序を変更
  • gistfile1.txt - test2.shの実行結果
  • test3.sh - test.shに対して実行順序を変更
  • gistfile2.txt - test3.shの実行結果
Intel(R) Core(TM) i3-4160 CPU @ 3.60GHz
MemTotal: 16279924 kB
OS: Ubuntu 18.04.3 LTS
compiler: gcc (Ubuntu 7.4.0-1ubuntu1~18.04.1) 7.4.0, g++ (Ubuntu 7.4.0-1ubuntu1~18.04.1) 7.4.0
#include <stdio.h>
#include <stdlib.h>
#include <complex.h>
#include <sys/time.h>
double time_in_sec(){
struct timeval t;
gettimeofday(&t, NULL);
return ((double)t.tv_sec) + ((double)t.tv_usec) / 1000000.0;
}
#include "complex_c_func.c"
double _Complex dst;
int main(int argc, char **argv){
if(argc < 3){
printf("usage %s nums repetitions\n", argv[0]);
return 0;
}
int n = atoi(argv[1]);
int m = atoi(argv[2]);
if(argc >= 4){
srand(atoi(argv[3]));
}
double min = 0;
double max = 0;
double t = 0;
for(int j = 0; j < m; j++){
double _Complex* src0;
double _Complex* src1;
src0 = (double _Complex*)malloc(sizeof(double _Complex) * n);
src1 = (double _Complex*)malloc(sizeof(double _Complex) * n);
for(int i = 0; i < n; i++){
double r;
r = (double)rand()/RAND_MAX;
src0[i] = r + cexp(I*3.14);
r = (double)rand()/RAND_MAX;
src1[i] = r + cexp(I*3.14);
}
double t0 = time_in_sec();
func(src0, src1, &dst, n);
free(src0);
free(src1);
double t1 = time_in_sec();
double tt = t1 - t0;
t += tt;
min = (min == 0) || (tt < min) ? tt : min;
max = (max == 0) || (tt > max) ? tt : max;
#ifdef VERBOSE
printf("%f + %f * i\n", creal(dst), cimag(dst));
printf("%f\n", tt);
#endif
}
printf("%s: n=%d, avg=%f, max=%f, min=%f, (%g,%g)\n", argv[0], n, (t/m), max, min, creal(dst), cimag(dst));
}
void func(double _Complex* src0, double _Complex* src1, double _Complex *dst, int n){
double _Complex sum = 0;
for(int i = 0; i < n; i++){
#ifdef VERBOSE
printf("(%f+%fi) * (%f+%fi)\n", creal(src0[i]), cimag(src1[i]), creal(src1[i]), cimag(src1[i]));
#endif
sum += src0[i] * src1[i];
}
*dst = sum;
}
4,6c4,6
< .globl func
< .type func, @function
< func:
---
> .globl _Z4funcPCdS0_S0_i
> .type _Z4funcPCdS0_S0_i, @function
> _Z4funcPCdS0_S0_i:
75c75
< .size func, .-func
---
> .size _Z4funcPCdS0_S0_i, .-_Z4funcPCdS0_S0_i
#include <iostream>
#include <iomanip>
#include <complex>
#include <cmath>
#include <sys/time.h>
#include <stdlib.h>
double time_in_sec(){
struct timeval t;
gettimeofday(&t, NULL);
return ((double)t.tv_sec) + ((double)t.tv_usec) / 1000000.0;
}
void func(std::complex<double>* src0, std::complex<double>* src1, std::complex<double>& dst, int n){
std::complex<double> sum = 0;
for(int i = 0; i < n; i++){
#ifdef VERBOSE
std:cout << src0[i] << " * " << src1[i] << std::endl;
#endif
sum += src0[i] * src1[i];
}
dst = sum;
}
std::complex<double> dst;
int main(int argc, char **argv)
{
if(argc < 3){
printf("usage %s nums repetitions\n", argv[0]);
return 0;
}
int n = atoi(argv[1]);
int m = atoi(argv[2]);
if(argc >= 4){
srand(atoi(argv[3]));
}
double min = 0;
double max = 0;
double t = 0;
for(int j = 0; j < m; j++){
std::complex<double> *src0;
std::complex<double> *src1;
src0 = new std::complex<double>[n];
src1 = new std::complex<double>[n];
using namespace std::complex_literals;
//std::cout << std::fixed << std::setprecision(1);
for(int k = 0; k < n; k++){
double r;
r = (double)rand()/RAND_MAX;
src0[k] = r + std::exp(1i*3.14);
r = (double)rand()/RAND_MAX;
src1[k] = r + std::exp(1i*3.14);
}
double t0 = time_in_sec();
func(src0, src1, dst, n);
double t1 = time_in_sec();
double tt = t1 - t0;
min = (min == 0) || (tt < min) ? tt : min;
max = (max == 0) || (tt > max) ? tt : max;
t += tt;
delete[] src0;
delete[] src1;
#ifdef VERBOSE
std::cout << "" << dst << '\n';
std::cout << "" << tt << '\n';
#endif
}
std::cout << argv[0] << ": "
<< "n=" << n
<< ", avg=" << (t/m)
<< ", max=" << max
<< ", min=" << min
<< ", " << dst << std::endl;
}
gcc -O3 -S complex_c_func.c
mv complex_c_func.s complex_c_func_gcc.s
g++ -O3 -S complex_c_func.c
mv complex_c_func.s complex_c_func_g++.s
diff complex_c_func_gcc.s complex_c_func_g++.s > complex_c_func_gcc_g++.diff
miyo@grappa:% sh test.sh
./complex_c_gcc: n=500000, avg=0.001950, max=0.002074, min=0.001921, (124783,-795.622)
./complex_c_gcc: n=1000000, avg=0.003830, max=0.004015, min=0.003797, (249785,-1592.31)
./complex_c_gcc: n=5000000, avg=0.020895, max=0.021260, min=0.020601, (1.25049e+06,-7963.74)
./complex_c_gcc: n=10000000, avg=0.041484, max=0.042116, min=0.041181, (2.50075e+06,-15928.6)
./complex_c_gcc: n=50000000, avg=0.192739, max=0.193341, min=0.192106, (1.24998e+07,-79630.9)
./complex_c_gcc: n=100000000, avg=0.376987, max=0.377422, min=0.376400, (2.50008e+07,-159265)
./complex_c_gcc: n=500000000, avg=1.856979, max=1.882624, min=1.849500, (1.25001e+08,-796346)
./complex_c_g++: n=500000, avg=0.002454, max=0.002691, min=0.002411, (124783,-795.622)
./complex_c_g++: n=1000000, avg=0.004929, max=0.005160, min=0.004794, (249785,-1592.31)
./complex_c_g++: n=5000000, avg=0.025850, max=0.025954, min=0.025231, (1.25049e+06,-7963.74)
./complex_c_g++: n=10000000, avg=0.049112, max=0.049733, min=0.048610, (2.50075e+06,-15928.6)
./complex_c_g++: n=50000000, avg=0.231742, max=0.234580, min=0.231187, (1.24998e+07,-79630.9)
./complex_c_g++: n=100000000, avg=0.453577, max=0.455913, min=0.453133, (2.50008e+07,-159265)
./complex_c_g++: n=500000000, avg=2.234970, max=2.247742, min=2.229900, (1.25001e+08,-796346)
./complex_c++_g++: n=500000, avg=0.00175731, max=0.00213313, min=0.00170398, (124783,-795.622)
./complex_c++_g++: n=1000000, avg=0.00348706, max=0.00371003, min=0.00343704, (249785,-1592.31)
./complex_c++_g++: n=5000000, avg=0.0200314, max=0.0203891, min=0.01916, (1.25049e+06,-7963.74)
./complex_c++_g++: n=10000000, avg=0.0376434, max=0.0377338, min=0.0375719, (2.50075e+06,-15928.6)
./complex_c++_g++: n=50000000, avg=0.18886, max=0.189569, min=0.188623, (1.24998e+07,-79630.9)
./complex_c++_g++: n=100000000, avg=0.379331, max=0.379741, min=0.37898, (2.50008e+07,-159265)
./complex_c++_g++: n=500000000, avg=1.88151, max=1.88634, min=1.87581, (1.25001e+08,-796346)
miyo@grappa:% sh test2.sh
./complex_c++_g++: n=500000, avg=0.00173452, max=0.00189805, min=0.00170898, (124783,-795.622)
./complex_c_g++: n=500000, avg=0.002419, max=0.002482, min=0.002402, (124783,-795.622)
./complex_c_gcc: n=500000, avg=0.002082, max=0.002104, min=0.001978, (124783,-795.622)
./complex_c++_g++: n=1000000, avg=0.00348315, max=0.00380111, min=0.00342607, (249785,-1592.31)
./complex_c_g++: n=1000000, avg=0.004700, max=0.004824, min=0.004675, (249785,-1592.31)
./complex_c_gcc: n=1000000, avg=0.004192, max=0.004372, min=0.004151, (249785,-1592.31)
./complex_c++_g++: n=5000000, avg=0.0193249, max=0.0195231, min=0.0191698, (1.25049e+06,-7963.74)
./complex_c_g++: n=5000000, avg=0.024368, max=0.024399, min=0.024339, (1.25049e+06,-7963.74)
./complex_c_gcc: n=5000000, avg=0.020679, max=0.020736, min=0.020643, (1.25049e+06,-7963.74)
./complex_c++_g++: n=10000000, avg=0.0375857, max=0.0376141, min=0.0375359, (2.50075e+06,-15928.6)
./complex_c_g++: n=10000000, avg=0.048546, max=0.048622, min=0.048522, (2.50075e+06,-15928.6)
./complex_c_gcc: n=10000000, avg=0.041034, max=0.041125, min=0.041005, (2.50075e+06,-15928.6)
./complex_c++_g++: n=50000000, avg=0.189053, max=0.189582, min=0.188828, (1.24998e+07,-79630.9)
./complex_c_g++: n=50000000, avg=0.231149, max=0.231402, min=0.231043, (1.24998e+07,-79630.9)
./complex_c_gcc: n=50000000, avg=0.193406, max=0.193582, min=0.193232, (1.24998e+07,-79630.9)
./complex_c++_g++: n=100000000, avg=0.377249, max=0.380063, min=0.375594, (2.50008e+07,-159265)
./complex_c_g++: n=100000000, avg=0.454100, max=0.455543, min=0.453699, (2.50008e+07,-159265)
./complex_c_gcc: n=100000000, avg=0.378095, max=0.379139, min=0.377263, (2.50008e+07,-159265)
./complex_c++_g++: n=500000000, avg=1.88809, max=1.95081, min=1.8729, (1.25001e+08,-796346)
./complex_c_g++: n=500000000, avg=2.254588, max=2.269981, min=2.238011, (1.25001e+08,-796346)
./complex_c_gcc: n=500000000, avg=1.854065, max=1.858170, min=1.846826, (1.25001e+08,-796346)
miyo@grappa:% sh test3.sh
./complex_c_g++: n=500000, avg=0.002481, max=0.002888, min=0.002422, (124783,-795.622)
./complex_c++_g++: n=500000, avg=0.0017477, max=0.00199914, min=0.00170898, (124783,-795.622)
./complex_c_gcc: n=500000, avg=0.002112, max=0.002212, min=0.002078, (124783,-795.622)
./complex_c_g++: n=1000000, avg=0.005018, max=0.005115, min=0.004970, (249785,-1592.31)
./complex_c++_g++: n=1000000, avg=0.00345421, max=0.00377107, min=0.00340796, (249785,-1592.31)
./complex_c_gcc: n=1000000, avg=0.004191, max=0.004651, min=0.003915, (249785,-1592.31)
./complex_c_g++: n=5000000, avg=0.024869, max=0.025046, min=0.024569, (1.25049e+06,-7963.74)
./complex_c++_g++: n=5000000, avg=0.0197668, max=0.0200329, min=0.019223, (1.25049e+06,-7963.74)
./complex_c_gcc: n=5000000, avg=0.020821, max=0.020868, min=0.020795, (1.25049e+06,-7963.74)
./complex_c_g++: n=10000000, avg=0.048805, max=0.049189, min=0.048685, (2.50075e+06,-15928.6)
./complex_c++_g++: n=10000000, avg=0.0376932, max=0.037771, min=0.0376091, (2.50075e+06,-15928.6)
./complex_c_gcc: n=10000000, avg=0.041294, max=0.041545, min=0.041143, (2.50075e+06,-15928.6)
./complex_c_g++: n=50000000, avg=0.232907, max=0.233014, min=0.232796, (1.24998e+07,-79630.9)
./complex_c++_g++: n=50000000, avg=0.188717, max=0.188896, min=0.188546, (1.24998e+07,-79630.9)
./complex_c_gcc: n=50000000, avg=0.194860, max=0.194969, min=0.194785, (1.24998e+07,-79630.9)
./complex_c_g++: n=100000000, avg=0.455017, max=0.455631, min=0.453399, (2.50008e+07,-159265)
./complex_c++_g++: n=100000000, avg=0.377277, max=0.379055, min=0.376125, (2.50008e+07,-159265)
./complex_c_gcc: n=100000000, avg=0.377480, max=0.378683, min=0.376184, (2.50008e+07,-159265)
./complex_c_g++: n=500000000, avg=2.236948, max=2.256663, min=2.227983, (1.25001e+08,-796346)
./complex_c++_g++: n=500000000, avg=1.91338, max=1.94352, min=1.87593, (1.25001e+08,-796346)
./complex_c_gcc: n=500000000, avg=1.853870, max=1.856689, min=1.851566, (1.25001e+08,-796346)
#!/bin/sh
gcc -O3 -o complex_c_gcc complex_c.c
for i in 500000 1000000 5000000 10000000 50000000 100000000 500000000
do
./complex_c_gcc $i 10
done
g++ -O3 -o complex_c_g++ complex_c.c
for i in 500000 1000000 5000000 10000000 50000000 100000000 500000000
do
./complex_c_g++ $i 10
done
g++ -O3 -std=c++14 -o complex_c++_g++ complex_cpp.cpp
for i in 500000 1000000 5000000 10000000 50000000 100000000 500000000
do
./complex_c++_g++ $i 10
done
#!/bin/sh
g++ -O3 -std=c++14 -o complex_c++_g++ complex_cpp.cpp
for i in 500000 1000000 5000000 10000000 50000000 100000000 500000000
do
./complex_c++_g++ $i 10
./complex_c_g++ $i 10
./complex_c_gcc $i 10
done
#!/bin/sh
g++ -O3 -std=c++14 -o complex_c++_g++ complex_cpp.cpp
for i in 500000 1000000 5000000 10000000 50000000 100000000 500000000
do
./complex_c_g++ $i 10
./complex_c++_g++ $i 10
./complex_c_gcc $i 10
done
@miyo
Copy link
Author

miyo commented Dec 8, 2019

@miyo
Copy link
Author

miyo commented Dec 8, 2019

test2.shのような順序で実行した場合
test2

test3.shのような順序で実行した場合
test3

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment