Skip to content

Instantly share code, notes, and snippets.

@is8ac
Created January 4, 2021 02:50
Show Gist options
  • Save is8ac/df982fa1fa0ae22f43cdf35b1115d9cb to your computer and use it in GitHub Desktop.
Save is8ac/df982fa1fa0ae22f43cdf35b1115d9cb to your computer and use it in GitHub Desktop.
2 implementations of the same algorithm, 6 different input sizes, 6 rustc optimization parameters

default release opt

version segs threads input pixel size output pixel size n params ms per example ns per pixel bit ns per channel ns per parameter
3 2 16 32 32 9216 32.938 1030182.004 1030182.004 3577.021
3 2 16 64 32 18432 64.750 1011854.566 2023709.133 3513.384
3 2 16 128 32 36864 129.938 1015413.960 4061655.842 3525.743
3 2 16 256 32 73728 277.000 1082303.786 8658430.289 3757.999
3 2 16 512 32 147456 568.000 1109743.197 17755891.156 3853.275
3 2 16 1024 32 294912 1125.500 1099288.490 35177231.688 3816.974
5 2 16 32 32 9216 10.312 323690.141 323690.141 1123.924
5 2 16 64 32 18432 15.875 248133.127 496266.254 861.573
5 2 16 128 32 36864 29.500 230856.398 923425.594 801.585
5 2 16 256 32 73728 57.750 225611.752 1804894.020 783.374
5 2 16 512 32 147456 149.250 291540.294 4664644.703 1012.293
5 2 16 1024 32 294912 492.500 481434.008 15405888.266 1671.646

codegen-units = 1

version segs threads input pixel size output pixel size n params ms per example ns per pixel bit ns per channel ns per parameter
3 2 16 32 32 9216 31.625 988877.023 988877.023 3433.601
3 2 16 64 32 18432 59.000 921982.258 1843964.516 3201.327
3 2 16 128 32 36864 117.250 916140.265 3664561.061 3181.043
3 2 16 256 32 73728 260.750 1019021.881 8152175.047 3538.270
3 2 16 512 32 147456 571.250 1115857.159 17853714.539 3874.504
3 2 16 1024 32 294912 1033.500 1009677.716 32309686.922 3505.825
5 2 16 32 32 9216 10.062 315686.988 315686.988 1096.135
5 2 16 64 32 18432 19.625 307469.552 614939.104 1067.603
5 2 16 128 32 36864 50.000 391013.694 1564054.775 1357.686
5 2 16 256 32 73728 231.000 902370.037 7218960.293 3133.229
5 2 16 512 32 147456 566.500 1106640.410 17706246.555 3842.501
5 2 16 1024 32 294912 4454.000 4349625.233 139188007.453 15102.865

lto = "thin"

version segs threads input pixel size output pixel size n params ms per example ns per pixel bit ns per channel ns per parameter
3 2 16 32 32 9216 9.500 297792.469 297792.469 1034.002
3 2 16 64 32 18432 16.812 262990.287 525980.574 913.161
3 2 16 128 32 36864 33.812 264188.154 1056752.617 917.320
3 2 16 256 32 73728 79.250 309573.160 2476585.281 1074.907
3 2 16 512 32 147456 163.000 318630.125 5098082.000 1106.355
3 2 16 1024 32 294912 333.000 325208.355 10406667.359 1129.196
5 2 16 32 32 9216 7.938 249043.172 249043.172 864.733
5 2 16 64 32 18432 12.250 192092.760 384185.520 666.989
5 2 16 128 32 36864 20.812 163036.229 652144.916 566.098
5 2 16 256 32 73728 38.500 150683.047 1205464.375 523.205
5 2 16 512 32 147456 111.000 216949.456 3471191.297 753.297
5 2 16 1024 32 294912 449.500 439156.565 14053010.094 1524.849

codegen-units = 1, lto = "thin"

version segs threads input pixel size output pixel size n params ms per example ns per pixel bit ns per channel ns per parameter
3 2 16 32 32 9216 8.000 251794.223 251794.223 874.285
3 2 16 64 32 18432 14.250 222984.823 445969.646 774.253
3 2 16 128 32 36864 29.062 227464.340 909857.361 789.807
3 2 16 256 32 73728 61.625 241207.314 1929658.512 837.525
3 2 16 512 32 147456 142.000 277431.813 4438909.008 963.305
3 2 16 1024 32 294912 269.000 262829.659 8410549.078 912.603
5 2 16 32 32 9216 6.875 215699.184 215699.184 748.955
5 2 16 64 32 18432 15.500 242780.401 485560.803 842.988
5 2 16 128 32 36864 40.938 320035.783 1280143.131 1111.235
5 2 16 256 32 73728 208.625 815322.578 6522580.621 2830.981
5 2 16 512 32 147456 517.000 1010128.052 16162048.828 3507.389
5 2 16 1024 32 294912 4033.000 3938599.417 126035181.328 13675.692

rustflags = ["-C", "target-cpu=native"]

version segs threads input pixel size output pixel size n params ms per example ns per pixel bit ns per channel ns per parameter
3 2 16 32 32 9216 32.688 1023065.971 1023065.971 3552.312
3 2 16 64 32 18432 61.938 968498.838 1936997.676 3362.843
3 2 16 128 32 36864 129.312 1010397.063 4041588.252 3508.323
3 2 16 256 32 73728 261.500 1021857.097 8174856.777 3548.115
3 2 16 512 32 147456 527.750 1031084.930 16497358.875 3580.156
3 2 16 1024 32 294912 1043.500 1019264.069 32616450.219 3539.111
5 2 16 32 32 9216 10.875 341049.057 341049.057 1184.198
5 2 16 64 32 18432 16.812 263036.051 526072.102 913.320
5 2 16 128 32 36864 28.562 223549.749 894198.996 776.214
5 2 16 256 32 73728 57.250 224051.503 1792412.027 777.957
5 2 16 512 32 147456 148.750 290771.451 4652343.219 1009.623
5 2 16 1024 32 294912 466.500 455779.345 14584939.031 1582.567

rustflags = ["-C", "target-cpu=native"], codegen-units = 1, lto = "thin"

version segs threads input pixel size output pixel size n params ms per example ns per pixel bit ns per channel ns per parameter
3 2 16 32 32 9216 8.688 272522.389 272522.389 946.258
3 2 16 64 32 18432 14.000 219625.833 439251.666 762.590
3 2 16 128 32 36864 28.000 219228.379 876913.518 761.210
3 2 16 256 32 73728 61.750 241528.711 1932229.691 838.641
3 2 16 512 32 147456 128.250 250879.322 4014069.156 871.109
3 2 16 1024 32 294912 306.500 299331.826 9578618.422 1039.347
5 2 16 32 32 9216 7.625 239311.895 239311.895 830.944
5 2 16 64 32 18432 16.625 260267.513 520535.025 903.707
5 2 16 128 32 36864 43.625 341027.564 1364110.256 1184.123
5 2 16 256 32 73728 211.625 827053.178 6616425.422 2871.712
5 2 16 512 32 147456 478.750 935470.216 14967523.461 3248.160
5 2 16 1024 32 294912 3804.500 3715350.809 118891225.891 12900.524

default opt, single threaded

version segs threads input pixel size output pixel size n params ms per example ns per pixel bit ns per channel ns per parameter
3 2 1 32 32 9216 28.441 888814.643 888814.643 3086.162
3 2 1 64 32 18432 54.285 848228.163 1696456.327 2945.237
3 2 1 128 32 36864 107.711 841520.823 3366083.293 2921.947
3 2 1 256 32 73728 215.102 840262.276 6722098.208 2917.577
3 2 1 512 32 147456 432.703 845142.628 13522282.044 2934.523
3 2 1 1024 32 294912 857.906 837800.704 26809622.517 2909.030
5 2 1 32 32 9216 8.453 264163.800 264163.800 917.235
5 2 1 64 32 18432 14.371 224579.332 449158.665 779.789
5 2 1 128 32 36864 25.227 197086.169 788344.677 684.327
5 2 1 256 32 73728 46.281 180809.993 1446479.948 627.812
5 2 1 512 32 147456 95.156 185866.345 2973861.518 645.369
5 2 1 1024 32 294912 185.500 181181.513 5797808.405 629.102
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment