Skip to content

Instantly share code, notes, and snippets.

@kevroletin
Created December 15, 2010 14:01
Show Gist options
  • Save kevroletin/741964 to your computer and use it in GitHub Desktop.
Save kevroletin/741964 to your computer and use it in GitHub Desktop.
#include <iostream>
#include <stdio.h>
#include <math.h>
#include <fstream>
#include <math.h>
using namespace std;
#include "megaprofiler.h"
const int max_n = 10000000;
const double dh = 1;
const double dt = 0.5;
const double C = dt / (dh * dh);
const int step_cnt = 10;
float data[2][max_n];
float init_data[max_n];
float* u;
float* nu;
unsigned long long ov, res;
void Init(float* arr, int count, bool forward = false)
{
for (int i = 1; i < count; ++i)
arr[i] = sin ((i + 0.0) / 10) * 100;
arr[0] = arr[count] = 0;
}
void Print(ostream& o, float* arr, int count, int t, bool forward = true)
{
if (forward)
for (int i = 0; i <= count; ++i)
o << i << ' ' << arr[i] << ' ' << t << '\n';
else
for (int i = count; i >= 0; --i)
o << i << ' ' << arr[i] << ' ' << t << '\n';
}
float two[4];
float CC[4];
int main()
{
TEST_HEAD(1000);
TEST_START;
TEST_FIN(ov);
//----------------------------------------------
ofstream out("output.txt");
int n = 100;
Init(init_data, n);
TEST_HEAD(1000);
for (int i = 0; i < n; ++i)
data[1][i] = init_data[i];
TEST_START;
for (int s = 1; s <= step_cnt; ++s)
{
u = data[s % 2];
nu = data[(s + 1) % 2];
// Print(out, u, n, s, s % 2);
for (int i = 1; i < n; ++i)
{
cout << u[i] << endl;
cout << (u[i-1] + u[i+1] - 2*u[i]) << endl;
nu[i] = 0.99 * (u[i-1] + u[i+1] - 2*u[i]) + u[i];
cout << "u[i] = " << u[i] << endl;
cout << "nu[i] = " << nu[i] << endl;
}
nu[0] = nu[n] = 0;
}
TEST_FIN(ov);
//--- sse ---
two[0] = two[1] = two[2] = two[3] = 2.0;
CC[0] = CC[1] = CC[2] = CC[3] = C;
TEST_HEAD(1000);
for (int i = 0; i < n; ++i)
data[1][i] = init_data[i];
TEST_START;
asm (\
"movups two, %xmm1;" //6 load 2
"movups CC, %xmm2;"//2 load C
);
for (int s = 1; s <= step_cnt; ++s)
{
u = data[s % 2];
nu = data[(s + 1) % 2];
for (int i = 0; i < n; i += 4)
{
asm (\
"movups $-1(u, %0), %%xmm3;" //3 load u + i - 1
"movups (u, %0), %%xmm4;" //4 load u + i
"movups $1(u, %0), %%xmm5;" //5 load u + i + 1
"movups %%xmm1, %%xmm6;" //1 <- 4 * 6
"mulps %%xmm4, %%xmm1;"
"addps %%xmm3, %%xmm1;" //1 <- 1 + 3
"addps %%xmm5, %%xmm1;" //1 <- 1 + 5
"mulps %%xmm2, %%xmm1;" //1 <- 1 * 2
"addps %%xmm4, %%xmm1;" //1 <- 1 + 4
"movups %%xmm6, (nu, %0);" //nu + i <- 6
:
:""(i)
:
);
}
}
TEST_FIN(ov);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment