This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// dz2 = dyhat * W3.T * relu'(a2) | |
vector<float> dz2 = dot(dyhat, transpose( &W3[0], 64, 10 ), BATCH_SIZE, 10, 64) * reluPrime(a2); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// dW2 = a1.T * dz2 | |
vector<float> dW2 = dot(transpose( &a1[0], BATCH_SIZE, 128 ), dz2, 128, BATCH_SIZE, 64); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
vector<float> dW3 = dot(transpose( &a2[0], BATCH_SIZE, 64 ), dyhat, 64, BATCH_SIZE, 10); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
vector<float> dyhat = yhat - b_y; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
vector <float> relu(const vector <float>& z){ | |
int size = z.size(); | |
vector <float> output; | |
for( int i = 0; i < size; ++i ) { | |
if (z[i] < 0){ | |
output.push_back(0.0); | |
} | |
else output.push_back(z[i]); | |
} | |
return output; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// # Feed forward | |
vector<float> a1 = relu(dot( b_X, W1, BATCH_SIZE, 784, 128 )); | |
vector<float> a2 = relu(dot( a1, W2, BATCH_SIZE, 128, 64 )); | |
vector<float> yhat = softmax(dot( a2, W3, BATCH_SIZE, 64, 10 ), 10); | |
// # Back propagation | |
vector<float> dyhat = yhat - b_y; | |
vector<float> dW3 = dot(transpose( &a2[0], BATCH_SIZE, 64 ), dyhat, 64, BATCH_SIZE, 10); | |
vector<float> dz2 = dot(dyhat, transpose( &W3[0], 64, 10 ), BATCH_SIZE, 10, 64) * reluPrime(a2); | |
vector<float> dW2 = dot(transpose( &a1[0], BATCH_SIZE, 128 ), dz2, 128, BATCH_SIZE, 64); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
./perceptron |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
nvcc -arch=sm_50 -rdc=true -lcudadevrt onehiddenlayerperceptron.cu -o perceptron |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Prediction[0] : 0.060997 True Value[0] : 0.000000 Error[0] : 0.060997 | |
Prediction[1] : 0.076193 True Value[1] : 0.000000 Error[1] : 0.076193 | |
Prediction[2] : 0.927551 True Value[2] : 1.000000 Error[2] : -0.072449 | |
Prediction[3] : 0.918263 True Value[3] : 1.000000 Error[3] : -0.081737 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
dDot_m1T_m2( X, l_1_d, W0, X_h, X_w, l1_w ); |