Skip to content

Instantly share code, notes, and snippets.

@foxtran
Created January 16, 2023 10:39
Show Gist options
  • Save foxtran/9c37ff3cbbf36e6eb8c05d77b79bf6ef to your computer and use it in GitHub Desktop.
Save foxtran/9c37ff3cbbf36e6eb8c05d77b79bf6ef to your computer and use it in GitHub Desktop.

Benchmark was compiled using the following compiler:

GCC version 12.2.0

Benchmark was compiled with the following options:

-mabi=lp64d -mcpu=sifive-s76 -misa-spec=20191213 -march=rv64ifd_zicsr -O3 -ffree-line-length-none -fpre-include=/usr/include/finclude/riscv64-linux-gnu/math-vector-fortran.h

Number of repeats is: 100000

Total size of one array is 4Kb

type test N mean time, ms sd time, ms min time, ms max time, ms
int8 add_v1 10 154.180 0.300 153.881 154.660
int8 add_v2 10 229.545 0.163 229.407 229.997
int8 add_v3 10 172.394 0.097 172.312 172.666
int8 mul_v1 10 21784.554 38.263 21734.102 21871.497
int8 mul_v2 10 14667.782 48.442 14611.107 14754.471
int8 mul_v3 10 1368.397 0.940 1367.566 1370.980
int8 fma_v1 10 59139.953 173.962 58692.840 59341.252
int8 fma_v2 10 34228.027 174.267 33799.763 34464.290
int8 fma_v3 10 38283.956 15.322 38266.248 38312.814
int8 fma_v4 10 27211.693 62.985 27111.599 27315.021
int8 div_v1 10 10832.464 16.944 10816.697 10875.626
int8 div_v2 10 33054.466 196.542 32747.107 33386.804
int8 inv_v1 10 1433.560 1.073 1432.488 1436.591
int8 inv_v2.1 10 1026.624 0.411 1026.147 1027.645
int8 inv_v2.2 10 5486.300 80.655 5368.874 5626.004
int8 inv_v2.3 10 1028.463 1.479 1026.583 1030.809
int8 popcnt 10 11288.803 4.854 11280.747 11298.194
int8 poppar 10 10085.949 252.318 9944.289 10593.157
int8 dim 10 1711.075 0.363 1710.501 1711.429
int8 iand 10 129.294 0.022 129.258 129.318
int8 ieor 10 171.878 0.041 171.817 171.946
int8 ior 10 171.806 0.064 171.735 171.955
int8 ishft 10 2680.281 0.837 2679.216 2682.155
int8 ishftc 10 4180.838 0.960 4179.290 4182.300
int8 ibset 10 1368.597 0.296 1368.128 1368.934
int8 ibclr 10 1367.733 0.198 1367.398 1367.945
int8 min 10 1716.899 1.226 1715.816 1720.364
int8 max 10 2258.198 64.990 2186.471 2330.996
int8 shifta 10 1029.033 4.225 1027.155 1041.684
int8 shiftl 10 1367.765 0.870 1366.904 1370.254
int8 shiftr 10 1368.840 0.424 1368.286 1369.756
int16 add_v1 10 154.014 0.078 153.927 154.205
int16 add_v2 10 222.490 0.292 222.296 223.317
int16 add_v3 10 172.730 0.574 172.486 174.447
int16 mul_v1 10 20416.120 218.875 19886.135 20559.396
int16 mul_v2 10 8787.868 38.870 8729.395 8864.329
int16 mul_v3 10 685.086 0.574 684.680 686.224
int16 fma_v1 10 39151.822 1421.016 35947.430 40054.226
int16 fma_v2 10 35041.264 494.932 33750.634 35331.821
int16 fma_v3 10 35527.173 450.874 34337.974 35763.367
int16 fma_v4 10 25710.934 118.172 25361.976 25797.383
int16 div_v1 10 5228.092 11.409 5214.855 5254.920
int16 div_v2 10 30558.839 135.868 30291.430 30765.738
int16 inv_v1 10 1028.196 0.979 1026.580 1029.260
int16 inv_v2.1 10 684.701 0.099 684.558 684.851
int16 inv_v2.2 10 2433.151 2.585 2430.446 2439.136
int16 inv_v2.3 10 515.460 1.471 513.831 518.084
int16 popcnt 10 5645.661 4.993 5641.015 5659.381
int16 poppar 10 5006.406 57.744 4971.694 5127.930
int16 dim 10 856.608 0.533 855.951 857.334
int16 iand 10 129.305 0.044 129.249 129.387
int16 ieor 10 171.889 0.077 171.805 172.071
int16 ior 10 171.793 0.065 171.724 171.958
int16 ishft 10 1370.018 1.652 1368.724 1374.678
int16 ishftc 10 2452.441 1.679 2450.849 2456.941
int16 ibset 10 684.705 0.227 684.549 685.358
int16 ibclr 10 684.868 0.356 684.468 685.659
int16 min 10 855.312 0.160 855.068 855.548
int16 max 10 1029.088 2.061 1027.188 1034.498
int16 shifta 10 845.499 4.355 840.652 853.527
int16 shiftl 10 684.619 0.155 684.414 684.959
int16 shiftr 10 855.637 0.504 855.215 856.793
int32 add_v1 10 171.487 0.095 171.397 171.726
int32 add_v2 10 257.512 0.052 257.434 257.573
int32 add_v3 10 176.485 4.704 172.220 187.931
int32 mul_v1 10 19665.968 88.318 19402.294 19713.062
int32 mul_v2 10 4889.738 15.492 4865.968 4908.887
int32 mul_v3 10 342.920 0.185 342.774 343.440
int32 fma_v1 10 29620.239 37.181 29567.077 29682.049
int32 fma_v2 10 29602.081 69.414 29501.666 29712.702
int32 fma_v3 10 29973.354 47.636 29916.607 30067.979
int32 fma_v4 10 23563.276 70.767 23369.407 23642.274
int32 div_v1 10 2572.045 4.310 2568.182 2582.294
int32 div_v2 10 28975.248 177.673 28650.921 29231.658
int32 inv_v1 10 343.059 0.078 342.940 343.159
int32 inv_v2.1 10 342.864 0.086 342.770 343.085
int32 inv_v2.2 10 1214.715 19.817 1203.581 1270.983
int32 inv_v2.3 10 260.002 1.291 257.904 262.621
int32 popcnt 10 2823.052 1.172 2821.368 2825.376
int32 poppar 10 2502.038 29.327 2484.688 2560.693
int32 dim 10 428.697 0.155 428.531 429.090
int32 iand 10 171.781 0.042 171.725 171.858
int32 ieor 10 171.870 0.043 171.812 171.951
int32 ior 10 129.301 0.031 129.260 129.368
int32 ishft 10 605.080 3.075 602.509 613.790
int32 ishftc 10 713.662 0.229 713.367 714.135
int32 ibset 10 342.976 0.278 342.761 343.651
int32 ibclr 10 343.067 0.293 342.777 343.720
int32 min 10 343.013 0.132 342.855 343.247
int32 max 10 514.732 0.652 513.688 515.690
int32 shifta 10 257.892 0.170 257.729 258.334
int32 shiftl 10 342.991 0.204 342.776 343.481
int32 shiftr 10 343.016 0.102 342.890 343.236
int64 add_v1 10 85.987 0.032 85.950 86.045
int64 add_v2 10 171.857 0.028 171.814 171.885
int64 add_v3 10 86.974 0.022 86.927 87.001
int64 mul_v1 10 9339.254 91.551 9188.585 9493.617
int64 mul_v2 10 2599.003 52.522 2495.918 2647.857
int64 mul_v3 10 171.933 0.032 171.881 171.972
int64 fma_v1 10 14930.031 504.423 13896.638 15331.385
int64 fma_v2 10 11842.597 320.479 11136.800 12145.558
int64 fma_v3 10 13188.739 473.490 12038.066 13594.269
int64 fma_v4 10 5799.165 172.533 5575.782 6156.902
int64 div_v1 10 1264.488 3.493 1261.761 1273.484
int64 div_v2 10 13626.730 123.792 13407.057 13773.680
int64 inv_v1 10 156.292 9.967 140.088 171.801
int64 inv_v2.1 10 172.093 0.026 172.053 172.136
int64 inv_v2.2 10 615.023 4.717 611.736 627.004
int64 inv_v2.3 10 130.578 1.333 129.309 132.781
int64 popcnt 10 1456.521 1.203 1454.246 1457.977
int64 poppar 10 1243.339 0.760 1241.899 1244.349
int64 dim 10 258.911 1.695 257.504 263.256
int64 iand 10 129.290 0.025 129.254 129.319
int64 ieor 10 171.938 0.027 171.894 171.963
int64 ior 10 172.271 0.627 171.816 173.239
int64 ishft 10 306.686 6.149 300.306 317.584
int64 ishftc 10 300.834 0.156 300.674 301.261
int64 ibset 10 171.933 0.037 171.874 171.973
int64 ibclr 10 214.578 0.047 214.509 214.631
int64 min 10 172.114 0.040 172.055 172.171
int64 max 10 170.704 0.867 169.183 171.894
int64 shifta 10 129.574 0.029 129.535 129.628
int64 shiftl 10 171.972 0.078 171.895 172.178
int64 shiftr 10 171.979 0.049 171.914 172.093
real32 add_v1 10 171.528 0.038 171.474 171.582
real32 add_v2 10 257.491 0.058 257.398 257.561
real32 add_v3 10 173.470 0.922 172.309 174.596
real32 mul_v1 10 128.592 0.045 128.541 128.705
real32 mul_v2 10 342.789 0.062 342.694 342.857
real32 mul_v3 10 173.842 1.923 172.389 177.169
real32 fma_v1 10 87.877 1.096 86.416 89.492
real32 fma_v2 10 128.803 0.042 128.748 128.875
real32 fma_v3 10 171.743 0.044 171.676 171.795
real32 fma_v4 10 257.165 0.047 257.088 257.217
real32 div_v1 10 349.484 0.066 349.382 349.590
real32 div_v2 10 1540.759 0.684 1539.947 1542.176
real32 inv 10 1540.422 0.560 1539.773 1541.705
real32 invsqrt_v1 10 3078.881 0.598 3077.807 3079.669
real32 invsqrt_v2 10 3078.865 0.836 3077.718 3080.299
real32 exp 10 5301.520 1.881 5299.345 5306.194
real32 erf 10 7850.312 5.180 7845.510 7862.435
real32 erfc 10 7775.552 13.802 7745.528 7805.566
real32 erfc_scaled 10 9277.672 11.055 9271.299 9308.566
real32 gamma 10 33707.274 348.965 33578.135 34753.733
real32 sqrt 10 1540.188 0.297 1539.773 1540.566
real32 sin 10 4600.062 5.705 4592.263 4607.740
real32 cos 10 4382.453 1.202 4380.607 4385.198
real32 tan 10 10302.190 7.124 10295.483 10321.488
real32 sinh 10 21727.584 12.065 21711.232 21754.170
real32 cosh 10 15776.607 22.459 15732.808 15810.379
real32 tanh 10 16192.304 3.011 16187.762 16195.516
real32 asinh 10 18565.543 4.408 18559.687 18570.578
real32 acosh 10 7523.262 1.561 7521.186 7525.217
real32 atan 10 8980.231 5.803 8969.358 8988.739
real32 bessel_j0 10 7269.507 4.914 7265.638 7282.819
real32 bessel_j1 10 7355.541 5.941 7350.856 7367.729
real32 bessel_y0 10 20351.192 8.946 20343.557 20366.343
real32 bessel_y1 10 17202.448 10.890 17187.982 17220.968
real32 epsilon 10 108.798 2.555 105.629 114.074
real32 exponent 10 7876.492 5.051 7871.906 7890.285
real32 fraction 10 7267.205 1.780 7265.049 7270.162
real32 log 10 4447.627 4.467 4445.209 4460.821
real32 log10 10 10747.150 23.997 10711.524 10773.471
real32 log_gamma 10 16576.035 45.187 16553.901 16710.801
real32 atan2 10 14991.345 8.971 14979.079 15006.832
real32 dim 10 1169.047 1.528 1165.828 1171.175
real64 add_v1 10 86.026 0.039 85.987 86.105
real64 add_v2 10 171.853 0.040 171.803 171.919
real64 add_v3 10 86.967 0.019 86.945 87.004
real64 mul_v1 10 86.102 0.024 86.074 86.141
real64 mul_v2 10 171.940 0.072 171.875 172.129
real64 mul_v3 10 88.513 1.707 86.863 91.129
real64 fma_v1 10 45.231 0.992 43.742 46.636
real64 fma_v2 10 86.182 0.020 86.155 86.227
real64 fma_v3 10 107.629 0.033 107.588 107.676
real64 fma_v4 10 171.872 0.068 171.806 172.039
real64 div_v1 10 227.486 0.086 227.398 227.675
real64 div_v2 10 1369.327 0.276 1368.967 1369.626
real64 inv 10 1369.406 0.254 1368.966 1369.693
real64 invsqrt_v1 10 2736.899 0.621 2736.029 2737.906
real64 invsqrt_v2 10 2738.063 3.770 2735.992 2749.173
real64 exp 10 2866.071 2.084 2864.525 2871.944
real64 erf 10 4120.034 2.133 4118.124 4125.564
real64 erfc 10 4259.712 3.422 4254.500 4267.001
real64 erfc_scaled 10 5655.621 72.846 5528.256 5746.572
real64 gamma 10 18111.080 8.838 18098.515 18126.347
real64 sqrt 10 1369.196 0.251 1368.852 1369.471
real64 sin 10 5023.851 1.489 5021.318 5026.392
real64 cos 10 4912.890 1.747 4910.838 4915.920
real64 tan 10 6813.356 4.890 6805.394 6823.659
real64 sinh 10 11522.962 8.299 11511.513 11537.155
real64 cosh 10 8789.876 35.889 8732.449 8863.568
real64 tanh 10 8946.094 4.049 8941.581 8952.599
real64 asinh 10 11485.200 5.350 11480.395 11499.156
real64 acosh 10 3798.143 4.178 3794.990 3809.790
real64 atan 10 5378.069 6.031 5366.471 5391.154
real64 bessel_j0 10 3892.101 2.816 3889.856 3899.497
real64 bessel_j1 10 4370.156 1.625 4368.378 4372.731
real64 bessel_y0 10 9159.969 2.243 9157.158 9162.820
real64 bessel_y1 10 10432.820 2.223 10429.994 10435.376
real64 epsilon 10 66.985 4.481 63.397 78.980
real64 exponent 10 3811.847 1.060 3810.257 3813.030
real64 fraction 10 3719.800 0.876 3718.704 3720.986
real64 log 10 2778.286 2.360 2775.963 2782.395
real64 log10 10 6193.537 4.539 6189.627 6205.402
real64 log_gamma 10 9203.438 3.282 9197.866 9209.085
real64 atan2 10 9722.977 4.921 9718.569 9736.797
real64 dim 10 482.713 1.259 480.665 485.122
real128 add_v1 10 2574.932 19.827 2564.167 2631.649
real128 add_v2 10 2411.157 17.946 2392.111 2451.529
real128 add_v3 10 2564.922 7.596 2557.888 2581.158
real128 mul_v1 10 3651.161 3.559 3645.196 3658.450
real128 mul_v2 10 4531.501 2.205 4527.787 4535.691
real128 mul_v3 10 4443.683 1.302 4442.077 4445.589
real128 fma_v1 10 3838.736 15.658 3815.710 3866.255
real128 fma_v2 10 3603.297 9.880 3591.856 3618.360
real128 fma_v3 10 3624.228 11.196 3602.823 3641.256
real128 fma_v4 10 7267.879 4.686 7262.465 7279.833
real128 div_v1 10 3458.293 11.710 3423.482 3464.354
real128 div_v2 10 6646.595 12.207 6612.884 6656.481
real128 inv 10 6462.359 5.599 6453.826 6471.889
real128 invsqrt_v1 10 36675.788 24.132 36628.777 36706.275
real128 invsqrt_v2 10 36740.737 16.538 36713.636 36770.931
real128 exp 10 110978.488 97.337 110804.393 111098.777
real128 erf 10 123311.573 37.678 123217.624 123362.747
real128 erfc 10 124197.159 118.062 123973.810 124375.957
real128 erfc_scaled 10 248899.496 195.291 248603.238 249119.490
real128 gamma 10 310476.342 261.031 310009.976 310731.364
real128 sqrt 10 30402.656 48.159 30298.763 30479.956
real128 sin 10 89069.862 123.499 88931.968 89404.817
real128 cos 10 89402.391 53.583 89301.748 89456.100
real128 tan 10 102111.089 71.990 102000.626 102216.891
real128 sinh 10 181610.615 106.127 181430.514 181832.082
real128 cosh 10 142238.319 207.877 141852.050 142487.265
real128 tanh 10 172655.887 138.043 172392.494 172837.768
real128 asinh 10 275575.148 160.229 275238.551 275759.234
real128 acosh 10 6008.563 27.207 5968.560 6038.922
real128 atan 10 101342.952 110.185 101196.637 101624.423
real128 bessel_j0 10 113255.227 31.532 113195.390 113291.003
real128 bessel_j1 10 110574.767 54.798 110459.083 110635.891
real128 bessel_y0 10 365876.503 226.723 365484.777 366138.886
real128 bessel_y1 10 378793.560 360.634 378081.538 379229.811
real128 epsilon 10 63.826 1.272 60.240 64.839
real128 exponent 10 4388.658 7.271 4378.983 4398.400
real128 fraction 10 3148.992 11.162 3137.014 3175.288
real128 log 10 130470.881 170.113 130081.962 130642.825
real128 log10 10 210440.862 179.811 210095.028 210741.536
real128 log_gamma 10 255885.968 134.696 255604.181 256123.489
real128 atan2 10 109819.332 39.388 109752.673 109872.054
real128 dim 10 4033.737 2.301 4029.695 4037.622
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment