asSqr/grad_desc.cpp

## grad_desc.cpp
#include <cstdio>
#include <cstdlib>
#include <cassert>
#include <algorithm>
#include <functional>
#include <iostream>
#include <map>
#include <numeric>
#include <queue>
#include <set>
#include <stack>
#include <string>
#include <vector>
#include <random>
#define repi(i,a,b) for(double i=(a);i<(b);++i)
#define rep(i,a) repi(i,0,a)
#define repdi(i,a,b) for(double i=(a)-1;i>=(b);--i)
#define repd(i,a) repdi(i,a,0)
#define itr(it,a) for( auto it = (a).begin(); it != (a).end(); ++it )
#define adouble(a) (a).begin(), (a).end()
#define radouble(a) (a).rbegin(), (a).rend()

template<class T> inline bool chmax(T& a, T b) { if (a < b) { a = b; return 1; } return 0; }
template<class T> inline bool chmin(T& a, T b) { if (a > b) { a = b; return 1; } return 0; }

template<class S, class T>
std::ostream& operator<< ( std::ostream& out, const std::pair<S,T>& a )
{ std::cout << '(' << a.first << ", " << a.second << ')'; return out; }

template<class T>
std::ostream &operator<< ( std::ostream& out, const std::vector<T>& a )
{ std::cout << '['; rep( i, a.size() ){ std::cout << a[i]; if( i != a.size()-1 ) std::cout << ", "; } std::cout << ']'; return out; }

using Vec = std::vector<double>;
using Mat = std::vector<Vec>;

const int m = 10, n = 20, l = 300;

Mat A( m, Vec( n, 0 ) );
Vec b( m );

double sqr( double x )
{ return x*x; }

double norm( const Vec &vs )
{
  double ret = 0;

  for( auto v : vs )
    ret += sqr(v);

  return ret;
}

Vec add( const Vec &v1, const Vec &v2 )
{
  Vec ret( v1.size() );

  if( v1.size() != v2.size() )
    return ret;

  rep( i, v1.size() )
    ret[i] = v1[i]+v2[i];

  return ret;
}

Vec negate( const Vec &v )
{
  Vec ret = v;

  rep( i, ret.size() )
    ret[i] *= -1;

  return ret;
}

Vec scale( const Vec &v, double coef )
{
  Vec ret = v;

  rep( i, ret.size() )
    ret[i] *= coef;

  return ret;
}

double dot( const Vec &v1, const Vec &v2 )
{
  double ret = 0;

  rep( i, v1.size() )
    ret += v1[i]*v2[i];

  return ret;
}

Vec multiply( const Mat &mat, const Vec &vec )
{
  double m = mat.size();
  double n = mat[0].size();

  Vec ret( m );

  assert( n == vec.size() );

  rep( i, m ) rep( j, n )
    ret[i] = mat[i][j]*vec[j];

  return ret;
}

void generateMat( Mat &A )
{
  double m = A.size();
  double n = A[0].size();

  std::random_device seed_gen;
  std::mt19937 engine(seed_gen());

  rep( i, m ) rep( j, n )
    A[i][j] = (engine() % 500)/2000.0;

  return;
}

void generateVec( Vec &b )
{
  Vec w( n, 1.0 );
  Vec epsilon( m );

  std::random_device seed_gen;
  std::mt19937 engine(seed_gen());

  rep( i, m )
    epsilon[i] = (engine() % 10)/1000000.0;

  b = add( multiply( A, w ), epsilon );

  return;
}

// convex
double f1( const Vec &v )
{
  assert( v.size() == n );

  return norm( add( b, negate( multiply( A, v ) ) ) );
}

// strongly convex
double f2( const Vec &v )
{
  assert( v.size() == n );

  const double lambda = 100;

  return norm( add( b, negate( multiply( A, v ) ) ) )+lambda*norm(v);
}

// non-convex
double f3( const Mat &w1, const Vec &w2 )
{
  assert( w1.size() == n );
  assert( w1[0].size() == w2.size() );

  return norm( add( b, negate( multiply( A, multiply( w1, w2 ) ) ) ) );
}

double f4( const Vec &v )
{
  return v[0]*v[0]*v[0]*v[0]/2.0-2.0*v[0]*v[0]*v[1]+4*v[1]*v[1]+8*v[0]+8*v[1];
}

double initF;

Vec optimize()
{
  Vec x( n );

  std::random_device seed_gen;
  std::mt19937 engine(seed_gen());

  rep( i, n )
    x[i] = (engine() % 10)/100.0;

  initF = f1(x);

  const double eps = 1e-3;

  auto grad = [&]( const Vec &x )
  {
    Vec fwd, bwd;
    Vec ret( x.size() );

    rep( i, x.size() )
    {
      fwd = bwd = x;
      fwd[i] += eps;
      bwd[i] -= eps;
      ret[i] = (f1( fwd )-f1( bwd )) / (2.0*eps);
    }

    return ret;
  };

  const double tol = 1e-6;

  while( sqrt(norm(grad(x))) >= tol )
  {
    Vec gradient = negate(grad(x));

    std::cout << gradient << ' ' << negate(gradient) << std::endl;

    double alpha = 1.0;

    // linear search
    auto l = [&]( double alpha ) {
      return f1( add( x, scale( gradient, alpha ) ) );
    };

    const double rho = 0.5, c1 = 1e-4;
    int cnt = 0;

    while( cnt < 100 && l( alpha ) > f1(x)-alpha*c1*dot( grad(x), grad(x) ) )
    {
      alpha *= rho;

      std::cout << alpha << std::endl;

      ++cnt;
    }

    if( cnt == 100 )
      break;

    x = add( x, scale( gradient, alpha ) );

    std::cout << sqrt(norm(grad(x))) << ' ' << alpha << std::endl;
  }

  return x;
}

int main()
{
  generateMat( A );
  generateVec( b );

  puts("A");

  rep( i, A.size() ) rep( j, A[0].size() )
    printf( "%.2f%c", A[i][j], j==A[0].size()-1 ? '\n' : ' ' );

  puts("b");

  rep( i, b.size() )
    printf( "%.2f%c", b[i], i==b.size()-1?'\n':' ' );

  Vec v( n, 1 );

  std::cout << f1(v) << std::endl;
  std::cout << f2(v) << std::endl;

  Vec opt_x = optimize();

  std::cout << initF << ' ' << f1(opt_x) << ' ' << opt_x << std::endl;

  return 0;
}
	#include <cstdio>
	#include <cstdlib>
	#include <cassert>
	#include <algorithm>
	#include <functional>
	#include <iostream>
	#include <map>
	#include <numeric>
	#include <queue>
	#include <set>
	#include <stack>
	#include <string>
	#include <vector>
	#include <random>
	#define repi(i,a,b) for(double i=(a);i<(b);++i)
	#define rep(i,a) repi(i,0,a)
	#define repdi(i,a,b) for(double i=(a)-1;i>=(b);--i)
	#define repd(i,a) repdi(i,a,0)
	#define itr(it,a) for( auto it = (a).begin(); it != (a).end(); ++it )
	#define adouble(a) (a).begin(), (a).end()
	#define radouble(a) (a).rbegin(), (a).rend()

	template<class T> inline bool chmax(T& a, T b) { if (a < b) { a = b; return 1; } return 0; }
	template<class T> inline bool chmin(T& a, T b) { if (a > b) { a = b; return 1; } return 0; }

	template<class S, class T>
	std::ostream& operator<< ( std::ostream& out, const std::pair<S,T>& a )
	{ std::cout << '(' << a.first << ", " << a.second << ')'; return out; }

	template<class T>
	std::ostream &operator<< ( std::ostream& out, const std::vector<T>& a )
	{ std::cout << '['; rep( i, a.size() ){ std::cout << a[i]; if( i != a.size()-1 ) std::cout << ", "; } std::cout << ']'; return out; }

	using Vec = std::vector<double>;
	using Mat = std::vector<Vec>;

	const int m = 10, n = 20, l = 300;

	Mat A( m, Vec( n, 0 ) );
	Vec b( m );

	double sqr( double x )
	{ return x*x; }

	double norm( const Vec &vs )
	{
	double ret = 0;

	for( auto v : vs )
	ret += sqr(v);

	return ret;
	}

	Vec add( const Vec &v1, const Vec &v2 )
	{
	Vec ret( v1.size() );

	if( v1.size() != v2.size() )
	return ret;

	rep( i, v1.size() )
	ret[i] = v1[i]+v2[i];

	return ret;
	}

	Vec negate( const Vec &v )
	{
	Vec ret = v;

	rep( i, ret.size() )
	ret[i] *= -1;

	return ret;
	}

	Vec scale( const Vec &v, double coef )
	{
	Vec ret = v;

	rep( i, ret.size() )
	ret[i] *= coef;

	return ret;
	}

	double dot( const Vec &v1, const Vec &v2 )
	{
	double ret = 0;

	rep( i, v1.size() )
	ret += v1[i]*v2[i];

	return ret;
	}

	Vec multiply( const Mat &mat, const Vec &vec )
	{
	double m = mat.size();
	double n = mat[0].size();

	Vec ret( m );

	assert( n == vec.size() );

	rep( i, m ) rep( j, n )
	ret[i] = mat[i][j]*vec[j];

	return ret;
	}

	void generateMat( Mat &A )
	{
	double m = A.size();
	double n = A[0].size();

	std::random_device seed_gen;
	std::mt19937 engine(seed_gen());

	rep( i, m ) rep( j, n )
	A[i][j] = (engine() % 500)/2000.0;

	return;
	}

	void generateVec( Vec &b )
	{
	Vec w( n, 1.0 );
	Vec epsilon( m );

	std::random_device seed_gen;
	std::mt19937 engine(seed_gen());

	rep( i, m )
	epsilon[i] = (engine() % 10)/1000000.0;

	b = add( multiply( A, w ), epsilon );

	return;
	}

	// convex
	double f1( const Vec &v )
	{
	assert( v.size() == n );

	return norm( add( b, negate( multiply( A, v ) ) ) );
	}

	// strongly convex
	double f2( const Vec &v )
	{
	assert( v.size() == n );

	const double lambda = 100;

	return norm( add( b, negate( multiply( A, v ) ) ) )+lambda*norm(v);
	}

	// non-convex
	double f3( const Mat &w1, const Vec &w2 )
	{
	assert( w1.size() == n );
	assert( w1[0].size() == w2.size() );

	return norm( add( b, negate( multiply( A, multiply( w1, w2 ) ) ) ) );
	}

	double f4( const Vec &v )
	{
	return v[0]v[0]v[0]v[0]/2.0-2.0v[0]v[0]v[1]+4v[1]v[1]+8v[0]+8v[1];
	}

	double initF;

	Vec optimize()
	{
	Vec x( n );

	std::random_device seed_gen;
	std::mt19937 engine(seed_gen());

	rep( i, n )
	x[i] = (engine() % 10)/100.0;

	initF = f1(x);

	const double eps = 1e-3;

	auto grad = [&]( const Vec &x )
	{
	Vec fwd, bwd;
	Vec ret( x.size() );

	rep( i, x.size() )
	{
	fwd = bwd = x;
	fwd[i] += eps;
	bwd[i] -= eps;
	ret[i] = (f1( fwd )-f1( bwd )) / (2.0*eps);
	}

	return ret;
	};

	const double tol = 1e-6;

	while( sqrt(norm(grad(x))) >= tol )
	{
	Vec gradient = negate(grad(x));

	std::cout << gradient << ' ' << negate(gradient) << std::endl;

	double alpha = 1.0;

	// linear search
	auto l = [&]( double alpha ) {
	return f1( add( x, scale( gradient, alpha ) ) );
	};

	const double rho = 0.5, c1 = 1e-4;
	int cnt = 0;

	while( cnt < 100 && l( alpha ) > f1(x)-alphac1dot( grad(x), grad(x) ) )
	{
	alpha *= rho;

	std::cout << alpha << std::endl;

	++cnt;
	}

	if( cnt == 100 )
	break;

	x = add( x, scale( gradient, alpha ) );

	std::cout << sqrt(norm(grad(x))) << ' ' << alpha << std::endl;
	}

	return x;
	}

	int main()
	{
	generateMat( A );
	generateVec( b );

	puts("A");

	rep( i, A.size() ) rep( j, A[0].size() )
	printf( "%.2f%c", A[i][j], j==A[0].size()-1 ? '\n' : ' ' );

	puts("b");

	rep( i, b.size() )
	printf( "%.2f%c", b[i], i==b.size()-1?'\n':' ' );

	Vec v( n, 1 );

	std::cout << f1(v) << std::endl;
	std::cout << f2(v) << std::endl;

	Vec opt_x = optimize();

	std::cout << initF << ' ' << f1(opt_x) << ' ' << opt_x << std::endl;

	return 0;
	}