Skip to content

Instantly share code, notes, and snippets.

@logicmachine
Last active August 29, 2015 14:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save logicmachine/1023a2309da038620993 to your computer and use it in GitHub Desktop.
Save logicmachine/1023a2309da038620993 to your computer and use it in GitHub Desktop.
CPAC2014-COUNTARI
#include <iostream>
#include <vector>
#include <algorithm>
#include <cstring>
#include <cstdint>
using namespace std;
static const int MAX_A = 30000;
static uint32_t forward_hist[30004];
static uint32_t backward_hist[30004];
int main(){
ios_base::sync_with_stdio(false);
// 入力の読み取り・初期ヒストグラムの計算
int n;
cin >> n;
vector<int> a(n);
memset(forward_hist, 0, sizeof(forward_hist));
memset(backward_hist, 0, sizeof(backward_hist));
for(int i = 0; i < n; ++i){
cin >> a[i];
++forward_hist[a[i]];
}
// 解の計算
__asm__(
// xmm0 = [ 0, 0 ]
"pxor %%xmm0, %%xmm0;"
: );
for(int i = 0; i < n; ++i){
// ヒストグラムの更新 (1)
--forward_hist[a[i]];
// 畳み込む範囲の先頭へのポインタと長さを取得
const int w = min(a[i], MAX_A - a[i]);
const uint32_t *forward = forward_hist + a[i] - w;
const uint32_t *backward = backward_hist + MAX_A - a[i] - w;
const int length = 2 * w + 1;
// 4要素ずつ畳みこむ
for(int j = 0; j < length; j += 4){
__asm__(
// xmm1 = [ forward[j], forward[j + 1], forward[j + 2], forward[j + 3] ]
"movdqu (%0), %%xmm1;"
// xmm2 = [ backward[j], backward[j + 1], backward[j + 2], backward[j + 3] ]
"movdqu (%1), %%xmm2;"
// xmm3 = xmm1
"movdqa %%xmm1, %%xmm3;"
// xmm1 = [ xmm1[0] * xmm2[0], xmm1[2] * xmm2[2] ]
"pmuldq %%xmm2, %%xmm1;"
// xmm3 = [ xmm3[1], xmm3[2], xmm3[3], 0 ]
"psrldq $0x4, %%xmm3;"
// xmm2 = [ xmm2[1], xmm2[2], xmm2[3], 0 ]
"psrldq $0x4, %%xmm2;"
// xmm3 = [ xmm3[0] * xmm2[0], xmm3[2] * xmm2[2] ]
"pmuldq %%xmm2, %%xmm3;"
// xmm0 = [ xmm0[0] + xmm1[0], xmm0[1] + xmm1[1] ]
"paddq %%xmm1, %%xmm0;"
// xmm0 = [ xmm0[0] + xmm3[0], xmm0[1] + xmm3[1] ]
"paddq %%xmm3, %%xmm0;"
// %0 = forward + j, %1 = backward + j
: : "r" (forward + j), "r" (backward + j));
}
// ヒストグラムの更新 (2)
++backward_hist[MAX_A - a[i]];
}
int64_t answer[2];
// answer = xmm0
__asm__(
"movdqu %%xmm0, (%0);"
: : "r" (answer));
cout << (answer[0] + answer[1]) << endl;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment