Last active
August 29, 2015 14:11
-
-
Save logicmachine/1023a2309da038620993 to your computer and use it in GitHub Desktop.
CPAC2014-COUNTARI
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <vector> | |
#include <algorithm> | |
#include <cstring> | |
#include <cstdint> | |
using namespace std; | |
static const int MAX_A = 30000; | |
static uint32_t forward_hist[30004]; | |
static uint32_t backward_hist[30004]; | |
int main(){ | |
ios_base::sync_with_stdio(false); | |
// 入力の読み取り・初期ヒストグラムの計算 | |
int n; | |
cin >> n; | |
vector<int> a(n); | |
memset(forward_hist, 0, sizeof(forward_hist)); | |
memset(backward_hist, 0, sizeof(backward_hist)); | |
for(int i = 0; i < n; ++i){ | |
cin >> a[i]; | |
++forward_hist[a[i]]; | |
} | |
// 解の計算 | |
__asm__( | |
// xmm0 = [ 0, 0 ] | |
"pxor %%xmm0, %%xmm0;" | |
: ); | |
for(int i = 0; i < n; ++i){ | |
// ヒストグラムの更新 (1) | |
--forward_hist[a[i]]; | |
// 畳み込む範囲の先頭へのポインタと長さを取得 | |
const int w = min(a[i], MAX_A - a[i]); | |
const uint32_t *forward = forward_hist + a[i] - w; | |
const uint32_t *backward = backward_hist + MAX_A - a[i] - w; | |
const int length = 2 * w + 1; | |
// 4要素ずつ畳みこむ | |
for(int j = 0; j < length; j += 4){ | |
__asm__( | |
// xmm1 = [ forward[j], forward[j + 1], forward[j + 2], forward[j + 3] ] | |
"movdqu (%0), %%xmm1;" | |
// xmm2 = [ backward[j], backward[j + 1], backward[j + 2], backward[j + 3] ] | |
"movdqu (%1), %%xmm2;" | |
// xmm3 = xmm1 | |
"movdqa %%xmm1, %%xmm3;" | |
// xmm1 = [ xmm1[0] * xmm2[0], xmm1[2] * xmm2[2] ] | |
"pmuldq %%xmm2, %%xmm1;" | |
// xmm3 = [ xmm3[1], xmm3[2], xmm3[3], 0 ] | |
"psrldq $0x4, %%xmm3;" | |
// xmm2 = [ xmm2[1], xmm2[2], xmm2[3], 0 ] | |
"psrldq $0x4, %%xmm2;" | |
// xmm3 = [ xmm3[0] * xmm2[0], xmm3[2] * xmm2[2] ] | |
"pmuldq %%xmm2, %%xmm3;" | |
// xmm0 = [ xmm0[0] + xmm1[0], xmm0[1] + xmm1[1] ] | |
"paddq %%xmm1, %%xmm0;" | |
// xmm0 = [ xmm0[0] + xmm3[0], xmm0[1] + xmm3[1] ] | |
"paddq %%xmm3, %%xmm0;" | |
// %0 = forward + j, %1 = backward + j | |
: : "r" (forward + j), "r" (backward + j)); | |
} | |
// ヒストグラムの更新 (2) | |
++backward_hist[MAX_A - a[i]]; | |
} | |
int64_t answer[2]; | |
// answer = xmm0 | |
__asm__( | |
"movdqu %%xmm0, (%0);" | |
: : "r" (answer)); | |
cout << (answer[0] + answer[1]) << endl; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment