Skip to content

Instantly share code, notes, and snippets.

@kishida
Last active August 29, 2015 14:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kishida/687b25f3f2804f9ac3ac to your computer and use it in GitHub Desktop.
Save kishida/687b25f3f2804f9ac3ac to your computer and use it in GitHub Desktop.
aparapiベンチ
package kishida.aparapisample;
import com.amd.aparapi.Kernel;
import java.util.Random;
import java.util.stream.IntStream;
/**
*
* @author kishida
*/
public class AparapiBench extends Kernel{
static float[] in = new float[65536];
static float[] filter = new float[11 * 11];
static float[] out = new float[in.length];
public static void main(String[] args) {
Random r = new Random(1234);
for(int i = 0; i < in.length; ++i){
in[i] = r.nextFloat();
}
for(int i = 0; i < filter.length; ++i){
filter[i] = r.nextFloat();
}
k = new AparapiSample(); // new Kernel(){}やnew AparapiSample(){}だとだめ
k.setExplicit(true);
bench("gpu", () -> gpu());
bench("para", () -> para());
bench("seq", () -> seq());
System.out.println(k.getExecutionMode());
}
public static void gpu(){
k.put(in);
k.put(filter);
k.execute(in.length - filter.length + 1);
k.get(out);
}
public static void seq(){
for(int id = 0; id < in.length - filter.length + 1; ++id){
float result = 0;
for(int i = 0; i < filter.length; ++i){
result += filter[i] * in[id + i];
}
out[id] = result;;
}
}
public static void para(){
IntStream.range(0, in.length - filter.length).parallel().forEach(id -> {
float result = 0;
for(int i = 0; i < filter.length; ++i){
result += filter[i] * in[id + i];
}
out[id] = result;;
});
}
static Kernel k;
@Override
public void run() {
int id = getGlobalId();
float result = 0;
for(int i = 0; i < 11 * 11; ++i){ // filter.lengthが使えない
result += filter[i] * in[id + i];
}
out[id] = result;//result;
}
static void bench(String name, Runnable proc){
for(int i = 0; i < 10; ++i){
proc.run();
}
long start = System.currentTimeMillis();
for(int i = 0; i < 1000; ++i){
proc.run();
}
System.out.printf("%s:%.3fs%n", name, (System.currentTimeMillis() - start) / 1000.);
}
}
Mac book pro
i7 4コア 2.8GHz
Intel Iris pro 1024MB
gpu:8.025s
para:1.287s
seq:6.648s
GPU
gpu:2.836s
para:1.508s
seq:5.727s
JTP
i7 2600K 3.4GHz
NVIDIA GT730
gpu:0.790s
para:1.293s
seq:7.326s
GPU
gpu:1.555s
para:1.185s
seq:7.354s
JTP
NVIDIA GTX970
gpu:0.486s
para:1.148s
seq:7.340s
GPU
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment