Last active
May 2, 2016 12:34
-
-
Save YSRKEN/270c37189877dff882f87c672483f7fd to your computer and use it in GitHub Desktop.
「C# が C++ の速度を凌駕している」らしいので、C++側を高速化してみた ref: http://qiita.com/YSRKEN/items/8b53b1a783191503560c
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Compile: cl speedtest2.cpp /O2 /GL /EHsc | |
#include<chrono> | |
#include<cstdint> | |
#include<iostream> | |
using Byte = uint8_t; | |
void test(){ | |
//! 画面サイズを設定する | |
size_t w = 4321, h = 6789; | |
//! ストライドの大きさを計算する | |
//! (横幅を4で割り切れる画素数にする) | |
size_t stride = ( w + 3 ) & ~3; | |
//! 領域を確保する(各画素は1バイト) | |
auto *a = new Byte[stride * h]; | |
//! 二次元直交座標のX・YのXORを画素に代入する | |
//! (擬似的な画像処理のテスト) | |
for(size_t y = 0; y < h; y++){ | |
auto p = a + y * stride; | |
for(size_t x = 0; x < w; x++){ | |
p[x] = static_cast<Byte>(x ^ y); | |
} | |
} | |
delete[] a; | |
} | |
int main(){ | |
//! 計測開始 | |
auto start = std::chrono::system_clock::now(); | |
//! 計測用コード | |
for(size_t i = 0; i < 100; ++i){ | |
test(); | |
} | |
//! 計測終了・集計 | |
auto end = std::chrono::system_clock::now(); | |
auto msec = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count(); | |
std::cout << msec << "[ms]" << std::endl; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//! Compile: / GS / GL / analyze - / W3 / Gy / Zc:wchar_t / Zi / Gm - / O2 / sdl | |
//! / Fd"Release\vc140.pdb" / Zc : inline / fp : precise / D "WIN32" | |
//! / D "NDEBUG" / D "_CONSOLE" / D "_UNICODE" / D "UNICODE" | |
//! / errorReport : prompt / WX - / Zc : forScope / arch : AVX / Gd | |
//! / Oy - / Oi / MD / Fa"Release\" /EHsc /nologo /Fo"Release\" | |
//! /Fp"Release\sample.pch" | |
#include<chrono> | |
#include<cstdint> | |
#include<iostream> | |
#include<immintrin.h> | |
using Byte = uint8_t; | |
void test(){ | |
//! 画面サイズを設定する | |
const size_t w = 4321, h = 6789; | |
//! ストライドの大きさを計算する | |
//! (横幅を4で割り切れる画素数にする) | |
const size_t stride = ( w + 3 ) & ~3; | |
//! 領域を確保する(各画素は1バイト) | |
//! 本来ならアラインメントを揃えたいところだが…… | |
auto *a = new Byte[stride * h]; | |
//! 二次元直交座標のX・YのXORを画素に代入する | |
//! (擬似的な画像処理のテスト) | |
for(size_t y = 0; y < h; y++){ | |
auto p = a + y * stride; | |
const auto simd_y = _mm_set1_epi8(y); | |
for(size_t x = 0; x < (w >> 4) << 4; x += 16){ | |
const auto simd_x = _mm_set_epi8( | |
x+15,x+14,x+13,x+12,x+11,x+10,x+ 9,x+ 8, | |
x+ 7,x+ 6,x+ 5,x+ 4,x+ 3,x+ 2,x+ 1,x+ 0); | |
const auto simd_xor = _mm_xor_si128(simd_x, simd_y); | |
_mm_storeu_si128((__m128i*)(p + x), simd_xor); | |
} | |
for(size_t x = (w >> 4) << 4; x < w; ++x){ | |
p[x] = static_cast<Byte>(x ^ y); | |
} | |
} | |
delete[] a; | |
} | |
int main(){ | |
//! 計測開始 | |
auto start = std::chrono::system_clock::now(); | |
//! 計測用コード | |
for(size_t i = 0; i < 100; ++i){ | |
test(); | |
} | |
//! 計測終了・集計 | |
auto end = std::chrono::system_clock::now(); | |
auto msec = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count(); | |
std::cout << msec << "[ms]" << std::endl; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment