Skip to content

Instantly share code, notes, and snippets.

@botev
Last active June 17, 2016 20:54
Show Gist options
  • Save botev/d86bf210f0b821d291d6c732d08244b5 to your computer and use it in GitHub Desktop.
Save botev/d86bf210f0b821d291d6c732d08244b5 to your computer and use it in GitHub Desktop.
//
// Created by alex on 17/06/16.
//
#include "arrayfire.h"
#include "iomanip"
class BinaryCrossEntropy{
public:
inline
af::array func(af::array prediction, af::array targets){
// r1 = softplus(-prediction)
// r2 = softplus(prediction)
af::array r1 = af::select((-prediction) >= 50.000000, (-prediction), af::log1p(af::exp((-prediction))));
af::array r2 = af::select(prediction >= 50.000000, prediction, af::log1p(af::exp(prediction)));
return af::sum(af::flat(targets * r1 + (1.0 - targets) * r2) / targets.dims(1));
}
inline
af::array derivative(af::array prediction, af::array targets){
return (1.0 / (1.0 + af::exp((-prediction))) - targets) / targets.dims(1);
}
inline
af::array diag_hess(af::array prediction, af::array targets){
af::array sigm = 1.0 / (1.0 + af::exp((-prediction)));
return sigm * (1.0 - sigm) / targets.dims(1);
}
};
void check_grad_double(double eps) {
BinaryCrossEntropy b = BinaryCrossEntropy();
af::array p = af::randn(20, 30, f64);
af::array t = af::randu(20, 30, f64);
auto dE = b.derivative(p, t);
af::array loss;
std::cout << "Numerical computation" << std::endl;
for(auto j=0;j<20; ++j){
for(auto k=0;k<30; ++k){
double value = p(j, k).host<double>()[0];
p(j, k) = value + eps;
loss = b.func(p, t);
double loss_plus = loss.host<double>()[0];
p(j, k) = value - eps;
loss = b.func(p, t);
double loss_minus = loss.host<double>()[0];
p(j, k) = value;
double diff = (loss_plus - loss_minus) / (2 * eps);
std::cout << std::fixed << std::setprecision(10) << diff << " - " << dE(j, k).host<double>()[0] << std::endl;
}
}
}
void check_grad_float(double eps) {
BinaryCrossEntropy b = BinaryCrossEntropy();
af::array p = af::randn(20, 30);
af::array t = af::randu(20, 30);
auto dE = b.derivative(p, t);
af::array loss;
std::cout << "Numerical computation" << std::endl;
for(auto j=0;j<20; ++j){
for(auto k=0;k<30; ++k){
float value = p(j, k).host<float>()[0];
p(j, k) = value + eps;
loss = b.func(p, t);
double loss_plus = loss.host<float>()[0];
p(j, k) = value - eps;
loss = b.func(p, t);
double loss_minus = loss.host<float>()[0];
p(j, k) = value;
double diff = (loss_plus - loss_minus) / (2 * eps);
std::cout << std::fixed << std::setprecision(10) << diff << " - " << dE(j, k).host<float>()[0] << std::endl;
}
}
}
int main(){
check_grad_double(1e-5);
check_grad_float(1e-5);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment