public
Last active

  • Download Gist
Makefile
Makefile
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
ALL = \
rnn_ca1d-gcc-O2 rnn_ca1d-gcc-O3 rnn_ca1d-icc-O2 rnn_ca1d-icc-O3 \
rnn_ca2d-gcc-O2 rnn_ca2d-gcc-O3 rnn_ca2d-icc-O2 rnn_ca2d-icc-O3 \
rnn_opt0-gcc-O2 rnn_opt0-gcc-O3 rnn_opt0-icc-O2 rnn_opt0-icc-O3 \
rnn_opt1-gcc-O2 rnn_opt1-gcc-O3 rnn_opt1-icc-O2 rnn_opt1-icc-O3 \
rnn_opt2-gcc-O2 rnn_opt2-gcc-O3 rnn_opt2-icc-O2 rnn_opt2-icc-O3 \
rnn_opt3-gcc-O2 rnn_opt3-gcc-O3 rnn_opt3-icc-O2 rnn_opt3-icc-O3 \
rnn_opt4-gcc-O2 rnn_opt4-gcc-O3 rnn_opt4-icc-O2 rnn_opt4-icc-O3 \
 
#ALL = \
rnn_ca1d-gcc-O2 rnn_ca1d-gcc-O3 \
rnn_ca2d-gcc-O2 rnn_ca2d-gcc-O3 \
 
.PHONY: clean all runtest
 
all: ${ALL}
 
clean:
rm ${ALL}
 
rnn_ca1d-gcc-O2: rnn_ca1d.c
gcc -lm -O2 $< -o $@
rnn_ca1d-gcc-O3: rnn_ca1d.c
gcc -lm -O3 $< -o $@ -ftree-vectorizer-verbose=1
rnn_ca1d-icc-O2: rnn_ca1d.c
icc -vec-report1 -O2 $< -o $@
rnn_ca1d-icc-O3: rnn_ca1d.c
icc -vec-report1 -O3 $< -o $@
 
rnn_ca2d-gcc-O2: rnn_ca2d.c
gcc -lm -O2 $< -o $@
rnn_ca2d-gcc-O3: rnn_ca2d.c
gcc -lm -O3 $< -o $@ -ftree-vectorizer-verbose=1
rnn_ca2d-icc-O2: rnn_ca2d.c
icc -vec-report1 -O2 $< -o $@
rnn_ca2d-icc-O3: rnn_ca2d.c
icc -vec-report1 -O3 $< -o $@
 
rnn_opt0-gcc-O2: rnn_opt0.c
gcc -lm -O2 $< -o $@
rnn_opt0-gcc-O3: rnn_opt0.c
gcc -lm -O3 $< -o $@ -ftree-vectorizer-verbose=1
rnn_opt0-icc-O2: rnn_opt0.c
icc -vec-report1 -O2 $< -o $@
rnn_opt0-icc-O3: rnn_opt0.c
icc -vec-report1 -O3 $< -o $@
 
rnn_opt1-gcc-O2: rnn_opt1.c
gcc -lm -O2 $< -o $@
rnn_opt1-gcc-O3: rnn_opt1.c
gcc -lm -O3 $< -o $@ -ftree-vectorizer-verbose=1
rnn_opt1-icc-O2: rnn_opt1.c
icc -vec-report1 -O2 $< -o $@
rnn_opt1-icc-O3: rnn_opt1.c
icc -vec-report1 -O3 $< -o $@
 
rnn_opt2-gcc-O2: rnn_opt2.c
gcc -lm -O2 $< -o $@
rnn_opt2-gcc-O3: rnn_opt2.c
gcc -lm -O3 $< -o $@ -ftree-vectorizer-verbose=1
rnn_opt2-icc-O2: rnn_opt2.c
icc -vec-report1 -O2 $< -o $@
rnn_opt2-icc-O3: rnn_opt2.c
icc -vec-report1 -O3 $< -o $@
 
rnn_opt3-gcc-O2: rnn_opt3.c
gcc -lm -O2 -std=c99 $< -o $@
rnn_opt3-gcc-O3: rnn_opt3.c
gcc -lm -O3 -std=c99 $< -o $@ -ftree-vectorizer-verbose=1
rnn_opt3-icc-O2: rnn_opt3.c
icc -vec-report1 -O2 -restrict $< -o $@
rnn_opt3-icc-O3: rnn_opt3.c
icc -vec-report1 -O3 -restrict $< -o $@
 
rnn_opt4-gcc-O2: rnn_opt4.c
gcc -lm -O2 -std=c99 $< -o $@
rnn_opt4-gcc-O3: rnn_opt4.c
gcc -lm -O3 -std=c99 $< -o $@ -ftree-vectorizer-verbose=1
rnn_opt4-icc-O2: rnn_opt4.c
icc -vec-report1 -O2 -restrict $< -o $@
rnn_opt4-icc-O3: rnn_opt4.c
icc -vec-report1 -O3 -restrict $< -o $@
 
num_c=30
num_s=1000
repeat_times=300
#repeat_times=1
 
# print command and time output to standard error
runtest: ${ALL}
for i in $^; \
do \
printf "%s %d %d %d " \
./$$i ${num_c} ${num_s} ${repeat_times} 1>&2; \
time ./$$i ${num_c} ${num_s} ${repeat_times}; \
done
Result
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
./rnn_ca1d-gcc-O2 30 1000 300 1.64user 0.00system 0:01.67elapsed 98%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+271minor)pagefaults 0swaps
./rnn_ca1d-gcc-O3 30 1000 300 1.57user 0.00system 0:01.57elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+271minor)pagefaults 0swaps
./rnn_ca1d-icc-O2 30 1000 300 1.04user 0.00system 0:01.06elapsed 97%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+284minor)pagefaults 0swaps
./rnn_ca1d-icc-O3 30 1000 300 1.12user 0.00system 0:01.13elapsed 98%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+286minor)pagefaults 0swaps
./rnn_ca2d-gcc-O2 30 1000 300 1.50user 0.00system 0:01.51elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+275minor)pagefaults 0swaps
./rnn_ca2d-gcc-O3 30 1000 300 2.69user 0.00system 0:02.71elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+275minor)pagefaults 0swaps
./rnn_ca2d-icc-O2 30 1000 300 1.11user 0.00system 0:01.12elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+288minor)pagefaults 0swaps
./rnn_ca2d-icc-O3 30 1000 300 2.21user 0.00system 0:02.25elapsed 98%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+289minor)pagefaults 0swaps
./rnn_opt0-gcc-O2 30 1000 300 1.65user 0.00system 0:01.68elapsed 98%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+271minor)pagefaults 0swaps
./rnn_opt0-gcc-O3 30 1000 300 1.65user 0.00system 0:01.67elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+272minor)pagefaults 0swaps
./rnn_opt0-icc-O2 30 1000 300 0.84user 0.00system 0:00.84elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+285minor)pagefaults 0swaps
./rnn_opt0-icc-O3 30 1000 300 0.95user 0.00system 0:00.98elapsed 96%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+285minor)pagefaults 0swaps
./rnn_opt1-gcc-O2 30 1000 300 1.64user 0.00system 0:01.68elapsed 98%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+270minor)pagefaults 0swaps
./rnn_opt1-gcc-O3 30 1000 300 1.32user 0.00system 0:01.33elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+271minor)pagefaults 0swaps
./rnn_opt1-icc-O2 30 1000 300 0.99user 0.00system 0:01.02elapsed 97%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+284minor)pagefaults 0swaps
./rnn_opt1-icc-O3 30 1000 300 0.76user 0.00system 0:00.77elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+283minor)pagefaults 0swaps
./rnn_opt2-gcc-O2 30 1000 300 1.50user 0.00system 0:01.54elapsed 96%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+271minor)pagefaults 0swaps
./rnn_opt2-gcc-O3 30 1000 300 1.60user 0.00system 0:01.63elapsed 98%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+272minor)pagefaults 0swaps
./rnn_opt2-icc-O2 30 1000 300 0.92user 0.00system 0:00.92elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+285minor)pagefaults 0swaps
./rnn_opt2-icc-O3 30 1000 300 0.93user 0.00system 0:00.97elapsed 96%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+286minor)pagefaults 0swaps
./rnn_opt3-gcc-O2 30 1000 300 1.86user 0.00system 0:01.88elapsed 98%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+271minor)pagefaults 0swaps
./rnn_opt3-gcc-O3 30 1000 300 1.88user 0.00system 0:01.90elapsed 98%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+271minor)pagefaults 0swaps
./rnn_opt3-icc-O2 30 1000 300 1.22user 0.00system 0:01.23elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+285minor)pagefaults 0swaps
./rnn_opt3-icc-O3 30 1000 300 1.08user 0.00system 0:01.12elapsed 96%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+285minor)pagefaults 0swaps
./rnn_opt4-gcc-O2 30 1000 300 1.52user 0.00system 0:01.53elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+275minor)pagefaults 0swaps
./rnn_opt4-gcc-O3 30 1000 300 1.41user 0.00system 0:01.42elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+274minor)pagefaults 0swaps
./rnn_opt4-icc-O2 30 1000 300 0.61user 0.00system 0:00.61elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+289minor)pagefaults 0swaps
./rnn_opt4-icc-O3 30 1000 300 0.59user 0.00system 0:00.59elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+288minor)pagefaults 0swaps
---
./rnn_ca1d-gcc-O2 1.64user
./rnn_ca1d-gcc-O3 1.57user
./rnn_ca1d-icc-O2 1.04user
./rnn_ca1d-icc-O3 1.12user
./rnn_ca2d-gcc-O2 1.50user
./rnn_ca2d-gcc-O3 2.69user
./rnn_ca2d-icc-O2 1.11user
./rnn_ca2d-icc-O3 2.21user
./rnn_opt0-gcc-O2 1.65user
./rnn_opt0-gcc-O3 1.65user
./rnn_opt0-icc-O2 0.84user
./rnn_opt0-icc-O3 0.95user
./rnn_opt1-gcc-O2 1.64user
./rnn_opt1-gcc-O3 1.32user
./rnn_opt1-icc-O2 0.99user
./rnn_opt1-icc-O3 0.76user
./rnn_opt2-gcc-O2 1.50user
./rnn_opt2-gcc-O3 1.60user
./rnn_opt2-icc-O2 0.92user
./rnn_opt2-icc-O3 0.93user
./rnn_opt3-gcc-O2 1.86user
./rnn_opt3-gcc-O3 1.88user
./rnn_opt3-icc-O2 1.22user
./rnn_opt3-icc-O3 1.08user
./rnn_opt4-gcc-O2 1.52user
./rnn_opt4-gcc-O3 1.41user
./rnn_opt4-icc-O2 0.61user
./rnn_opt4-icc-O3 0.59user
rnn_ca1d.c
C
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
#include <stdlib.h>
#include <math.h>
 
typedef struct __RNN__{
int num_c, num_s;
double *wcc, *bc, *ec, *uc, *xc;
} RNN;
 
#define Wcc(i,j) self->wcc[ self->num_c*(i) + (j) ]
#define Bc(i) self->bc[(i)]
#define Ec(i) self->ec[(i)]
#define Uc(i,j) self->uc[ self->num_c*(i) + (j) ]
#define Xc(i,j) self->xc[ self->num_c*(i) + (j) ]
 
void RNN_fptt(RNN *self)
{
int s0, c0, c1;
 
s0 = 0;
for (c0 = 0; c0 < self->num_c; ++c0){
Xc(s0,c0) = tanh(Uc(s0,c0));
}
for (s0 = 1; s0 < self->num_s; ++s0){
for (c0 = 0; c0 < self->num_c; ++c0){
Uc(s0,c0) = (1 - Ec(c0)) * Uc(s0-1,c0) + Ec(c0) * Bc(c0);
for (c1 = 0; c1 < self->num_c; ++c1){
Uc(s0,c0) += Ec(c0) * Wcc(c0,c1) * Xc(s0-1,c1);
}
Xc(s0,c0) = tanh(Uc(s0,c0));
}
}
}
 
void RNN_init(RNN *self)
{
int c0, c1;
 
for (c0 = 0; c0 < self->num_c; ++c0){
for (c1 = 0; c1 < self->num_c; ++c1){
Wcc(c0,c1) = 0.1;
}
Ec(c0) = 0.1;
Bc(c0) = 0.1;
Uc(0,c0) = 0;
Xc(0,c0) = 0;
}
}
 
int main(int argc, char *argv[])
{
int nc = atoi((argv[1]));
int ns = atoi((argv[2]));
int i, repeat_times = atoi(argv[3]);
RNN rnn;
double wcc[nc*nc], bc[nc], ec[nc], uc[ns*nc], xc[ns*nc];
 
rnn.num_s = ns;
rnn.num_c = nc;
rnn.ec = ec;
rnn.bc = bc;
rnn.wcc = wcc;
rnn.uc = uc;
rnn.xc = xc;
 
RNN_init(&rnn);
for (i = 0; i < repeat_times; ++i){
RNN_fptt(&rnn);
}
 
return 0;
}
rnn_ca2d.c
C
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
#include <stdlib.h>
#include <math.h>
 
typedef struct __RNN__{
int num_c, num_s;
double **wcc, *bc, *ec, **uc, **xc;
} RNN;
 
#define Wcc(i,j) self->wcc[i][j]
#define Bc(i) self->bc[i]
#define Ec(i) self->ec[i]
#define Uc(i,j) self->uc[i][j]
#define Xc(i,j) self->xc[i][j]
 
void RNN_fptt(RNN *self)
{
int s0, c0, c1;
 
s0 = 0;
for (c0 = 0; c0 < self->num_c; ++c0){
Xc(s0,c0) = tanh(Uc(s0,c0));
}
for (s0 = 1; s0 < self->num_s; ++s0){
for (c0 = 0; c0 < self->num_c; ++c0){
Uc(s0,c0) = (1 - Ec(c0)) * Uc(s0-1,c0) + Ec(c0) * Bc(c0);
for (c1 = 0; c1 < self->num_c; ++c1){
Uc(s0,c0) += Ec(c0) * Wcc(c0,c1) * Xc(s0-1,c1);
}
Xc(s0,c0) = tanh(Uc(s0,c0));
}
}
}
 
void RNN_init(RNN *self)
{
int c0, c1;
 
for (c0 = 0; c0 < self->num_c; ++c0){
for (c1 = 0; c1 < self->num_c; ++c1){
Wcc(c0,c1) = 0.1;
}
Ec(c0) = 0.1;
Bc(c0) = 0.1;
Uc(0,c0) = 0;
Xc(0,c0) = 0;
}
}
 
int main(int argc, char *argv[])
{
int c, nc = atoi((argv[1]));
int s, ns = atoi((argv[2]));
int i, repeat_times = atoi(argv[3]);
RNN rnn;
double wcc[nc][nc], bc[nc], ec[nc], uc[ns][nc], xc[ns][nc];
double *pwcc[nc], *puc[ns], *pxc[ns];
 
rnn.num_s = ns;
rnn.num_c = nc;
rnn.ec = ec;
rnn.bc = bc;
rnn.wcc = pwcc;
rnn.uc = puc;
rnn.xc = pxc;
for (c = 0; c < nc; ++c) rnn.wcc[c] = wcc[c];
for (s = 0; s < ns; ++s) rnn.uc[s] = uc[s];
for (s = 0; s < ns; ++s) rnn.xc[s] = xc[s];
 
RNN_init(&rnn);
for (i = 0; i < repeat_times; ++i){
RNN_fptt(&rnn);
}
 
return 0;
}
rnn_opt0.c
C
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
#include <stdlib.h>
#include <math.h>
 
typedef struct __RNN__{
int num_c, num_s;
double *bc, *ec;
double *wcc, *wcc_c;
double *uc, *uc_c, *uc_p, *xc, *xc_c, *xc_p;
} RNN;
 
void RNN_fptt(RNN *self)
{
int s0, c0, c1;
double wccxc_bc;
 
self->uc_c = self->uc;
self->xc_c = self->xc;
for (c0 = 0; c0 < self->num_c; ++c0){
self->xc_c[c0] = tanh(self->uc_c[c0]);
}
self->uc_p = self->uc_c;
self->xc_p = self->xc_c;
self->uc_c += self->num_c;
self->xc_c += self->num_c;
for (s0 = 1; s0 < self->num_s; ++s0){
self->wcc_c = self->wcc;
for (c0 = 0; c0 < self->num_c; ++c0){
self->uc_c[c0] = (1 - self->ec[c0]) * self->uc_p[c0];
wccxc_bc = self->bc[c0];
for (c1 = 0; c1 < self->num_c; ++c1){
wccxc_bc += self->wcc_c[c1] * self->xc_p[c1];
}
self->uc_c[c0] += self->ec[c0] * wccxc_bc;
self->xc_c[c0] = tanh(self->uc_c[c0]);
self->wcc_c += self->num_c;
}
self->uc_p = self->uc_c;
self->xc_p = self->xc_c;
self->uc_c += self->num_c;
self->xc_c += self->num_c;
}
}
 
void RNN_init(RNN *self)
{
int c0, c1;
 
self->uc_c = self->uc;
self->xc_c = self->xc;
self->wcc_c = self->wcc;
for (c0 = 0; c0 < self->num_c; ++c0){
for (c1 = 0; c1 < self->num_c; ++c1){
self->wcc_c[c1] = 0.1;
}
self->ec[c0] = 0.1;
self->bc[c0] = 0.1;
self->uc_c[c0] = 0;
self->xc_c[c0] = 0;
self->wcc_c += self->num_c;
}
}
 
int main(int argc, char *argv[])
{
int nc = atoi((argv[1]));
int ns = atoi((argv[2]));
int i, repeat_times = atoi(argv[3]);
RNN rnn;
double wcc[nc*nc], bc[nc], ec[nc], uc[ns*nc], xc[ns*nc];
 
rnn.num_s = ns;
rnn.num_c = nc;
rnn.ec = ec;
rnn.bc = bc;
rnn.wcc = wcc;
rnn.uc = uc;
rnn.xc = xc;
 
RNN_init(&rnn);
for (i = 0; i < repeat_times; ++i){
RNN_fptt(&rnn);
}
 
return 0;
}
rnn_opt1.c
C
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
#include <stdlib.h>
#include <math.h>
 
typedef struct __RNN__{
int num_c, num_s;
double *bc, *bc_c, *ec, *ec_c;
double *wcc, *wcc_c;
double *uc, *uc_c, *uc_p, *xc, *xc_c, *xc_p0, *xc_p;
} RNN;
 
void RNN_fptt(RNN *self)
{
int s0, c0, c1;
double wccxc_bc;
 
self->uc_c = self->uc;
self->xc_c = self->xc;
for (c0 = 0; c0 < self->num_c; ++c0){
*self->xc_c = tanh(*self->uc_c);
self->xc_c++;
self->uc_c++;
}
self->uc_p = self->uc;
self->xc_p0 = self->xc;
for (s0 = 1; s0 < self->num_s; ++s0){
self->ec_c = self->ec;
self->bc_c = self->bc;
self->wcc_c = self->wcc;
for (c0 = 0; c0 < self->num_c; ++c0){
*self->uc_c = (1 - *self->ec_c) * *self->uc_p;
wccxc_bc = *self->bc_c;
self->xc_p = self->xc_p0;
for (c1 = 0; c1 < self->num_c; ++c1){
wccxc_bc += *self->wcc_c * *self->xc_p;
self->wcc_c++;
self->xc_p++;
}
*self->uc_c += *self->ec_c * wccxc_bc;
*self->xc_c = tanh(*self->uc_c);
self->ec_c++;
self->bc_c++;
self->xc_c++;
self->uc_c++;
self->uc_p++;
}
self->xc_p0 = self->xc_p;
}
}
 
void RNN_init(RNN *self)
{
int c0, c1;
 
self->uc_c = self->uc;
self->xc_c = self->xc;
self->wcc_c = self->wcc;
for (c0 = 0; c0 < self->num_c; ++c0){
for (c1 = 0; c1 < self->num_c; ++c1){
self->wcc_c[c1] = 0.1;
}
self->ec[c0] = 0.1;
self->bc[c0] = 0.1;
self->uc_c[c0] = 0;
self->xc_c[c0] = 0;
self->wcc_c += self->num_c;
}
}
 
int main(int argc, char *argv[])
{
int nc = atoi((argv[1]));
int ns = atoi((argv[2]));
int i, repeat_times = atoi(argv[3]);
RNN rnn;
double wcc[nc*nc], bc[nc], ec[nc], uc[ns*nc], xc[ns*nc];
 
rnn.num_s = ns;
rnn.num_c = nc;
rnn.ec = ec;
rnn.bc = bc;
rnn.wcc = wcc;
rnn.uc = uc;
rnn.xc = xc;
 
RNN_init(&rnn);
for (i = 0; i < repeat_times; ++i){
RNN_fptt(&rnn);
}
 
return 0;
}
rnn_opt2.c
C
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
#include <stdlib.h>
#include <math.h>
 
typedef struct __RNN__{
int num_c, num_s;
double *wcc, *bc, *ec, *uc, *xc;
} RNN;
 
#define Wcc(i,j) self->wcc[ self->num_c*(i) + (j) ]
#define Bc(i) self->bc[(i)]
#define Ec(i) self->ec[(i)]
#define Uc(i,j) self->uc[ self->num_c*(i) + (j) ]
#define Xc(i,j) self->xc[ self->num_c*(i) + (j) ]
 
void RNN_fptt(RNN *self)
{
int s0, c0, c1;
double wccxc_bc;
 
s0 = 0;
for (c0 = 0; c0 < self->num_c; ++c0){
Xc(s0,c0) = tanh(Uc(s0,c0));
}
for (s0 = 1; s0 < self->num_s; ++s0){
for (c0 = 0; c0 < self->num_c; ++c0){
Uc(s0,c0) = (1 - Ec(c0)) * Uc(s0-1,c0);
wccxc_bc = Bc(c0);
for (c1 = 0; c1 < self->num_c; ++c1){
wccxc_bc += Wcc(c0,c1) * Xc(s0-1,c1);
}
Uc(s0,c0) += Ec(c0) * wccxc_bc;
Xc(s0,c0) = tanh(Uc(s0,c0));
}
}
}
 
void RNN_init(RNN *self)
{
int c0, c1;
 
for (c0 = 0; c0 < self->num_c; ++c0){
for (c1 = 0; c1 < self->num_c; ++c1){
Wcc(c0,c1) = 0.1;
}
Ec(c0) = 0.1;
Bc(c0) = 0.1;
Uc(0,c0) = 0;
Xc(0,c0) = 0;
}
}
 
int main(int argc, char *argv[])
{
int nc = atoi((argv[1]));
int ns = atoi((argv[2]));
int i, repeat_times = atoi(argv[3]);
RNN rnn;
double wcc[nc*nc], bc[nc], ec[nc], uc[ns*nc], xc[ns*nc];
 
rnn.num_s = ns;
rnn.num_c = nc;
rnn.ec = ec;
rnn.bc = bc;
rnn.wcc = wcc;
rnn.uc = uc;
rnn.xc = xc;
 
RNN_init(&rnn);
for (i = 0; i < repeat_times; ++i){
RNN_fptt(&rnn);
}
 
return 0;
}
rnn_opt3.c
C
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
#include <stdlib.h>
#include <math.h>
 
typedef struct __RNN__{
int num_c, num_s;
double *wcc, *bc, *ec, *uc, *xc;
} RNN;
 
#define Wcc(i,j) self->wcc[ self->num_c*(i) + (j) ]
#define Bc(i) self->bc[(i)]
#define Ec(i) self->ec[(i)]
#define Uc(i,j) self->uc[ self->num_c*(i) + (j) ]
#define Xc(i,j) self->xc[ self->num_c*(i) + (j) ]
 
static inline double
ddot(double * restrict array1, double * restrict array2, int num)
{
int i;
double val=0;
for (i = 0; i < num; ++i) val += array1[i] * array2[i];
return val;
}
 
void RNN_fptt(RNN *self)
{
int s0, c0;
s0 = 0;
for (c0 = 0; c0 < self->num_c; ++c0){
Xc(s0,c0) = tanh(Uc(s0,c0));
}
for (s0 = 1; s0 < self->num_s; ++s0){
for (c0 = 0; c0 < self->num_c; ++c0){
Uc(s0,c0) = (1 - Ec(c0)) * Uc(s0-1,c0)
+ Ec(c0) * ( Bc(c0) + ddot(&Wcc(c0,0), &Xc(s0-1,0), self->num_c) );
Xc(s0,c0) = tanh(Uc(s0,c0));
}
}
}
 
void RNN_init(RNN *self)
{
int c0, c1;
 
for (c0 = 0; c0 < self->num_c; ++c0){
for (c1 = 0; c1 < self->num_c; ++c1){
Wcc(c0,c1) = 0.1;
}
Ec(c0) = 0.1;
Bc(c0) = 0.1;
Uc(0,c0) = 0;
Xc(0,c0) = 0;
}
}
 
int main(int argc, char *argv[])
{
int nc = atoi((argv[1]));
int ns = atoi((argv[2]));
int i, repeat_times = atoi(argv[3]);
RNN rnn;
double wcc[nc*nc], bc[nc], ec[nc], uc[ns*nc], xc[ns*nc];
 
rnn.num_s = ns;
rnn.num_c = nc;
rnn.ec = ec;
rnn.bc = bc;
rnn.wcc = wcc;
rnn.uc = uc;
rnn.xc = xc;
 
RNN_init(&rnn);
for (i = 0; i < repeat_times; ++i){
RNN_fptt(&rnn);
}
 
return 0;
}
rnn_opt4.c
C
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
#include <stdlib.h>
#include <math.h>
 
typedef struct __RNN__{
int num_c, num_s;
double **wcc, *bc, *ec, **uc, **xc;
} RNN;
 
#define Wcc(i,j) self->wcc[i][j]
#define Bc(i) self->bc[i]
#define Ec(i) self->ec[i]
#define Uc(i,j) self->uc[i][j]
#define Xc(i,j) self->xc[i][j]
#define VWcc(i) self->wcc[i]
#define VXc(i) self->xc[i]
 
static inline double
ddot(double * restrict array1, double * restrict array2, int num)
{
int i;
double val=0;
for (i = 0; i < num; ++i) val += array1[i] * array2[i];
return val;
}
 
void RNN_fptt(RNN *self)
{
int s0, c0;
 
s0 = 0;
for (c0 = 0; c0 < self->num_c; ++c0){
Xc(s0,c0) = tanh(Uc(s0,c0));
}
for (s0 = 1; s0 < self->num_s; ++s0){
for (c0 = 0; c0 < self->num_c; ++c0){
Uc(s0,c0) = (1 - Ec(c0)) * Uc(s0-1,c0)
+ Ec(c0) * ( Bc(c0) + ddot(VWcc(c0), VXc(s0-1), self->num_c) );
Xc(s0,c0) = tanh(Uc(s0,c0));
}
}
}
 
void RNN_init(RNN *self)
{
int c0, c1;
 
for (c0 = 0; c0 < self->num_c; ++c0){
for (c1 = 0; c1 < self->num_c; ++c1){
Wcc(c0,c1) = 0.1;
}
Ec(c0) = 0.1;
Bc(c0) = 0.1;
Uc(0,c0) = 0;
Xc(0,c0) = 0;
}
}
 
int main(int argc, char *argv[])
{
int c, nc = atoi((argv[1]));
int s, ns = atoi((argv[2]));
int i, repeat_times = atoi(argv[3]);
RNN rnn;
double wcc[nc][nc], bc[nc], ec[nc], uc[ns][nc], xc[ns][nc];
double *pwcc[nc], *puc[ns], *pxc[ns];
 
rnn.num_s = ns;
rnn.num_c = nc;
rnn.ec = ec;
rnn.bc = bc;
rnn.wcc = pwcc;
rnn.uc = puc;
rnn.xc = pxc;
for (c = 0; c < nc; ++c) rnn.wcc[c] = wcc[c];
for (s = 0; s < ns; ++s) rnn.uc[s] = uc[s];
for (s = 0; s < ns; ++s) rnn.xc[s] = xc[s];
 
RNN_init(&rnn);
for (i = 0; i < repeat_times; ++i){
RNN_fptt(&rnn);
}
 
return 0;
}

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.