lisitsyn/gist:a6d8ff6e8690431f967c5318c3750919

## gistfile1.txt
// Immutable features

// linear model
 - dot prod:
    - pairs
    -
 - cov var matrix: there's a Feature operator => CovarView => matrix
 -


class Features {

  Features(){}

  ...
  Features transformed_by(Transformer t) const;

  // this evaluates the stacked operators over the features
  // and returns the copy of features
  Features cached() const;

protected:
  void add_flag(flag) {
   flags |= flag;
  }
  int get_flags()
}

class DotFeatures : Features
{
   double inner(iterator, iterator) = 0;
   double inner(iterator, Vector) = 0;
   double inner(batch, Vector w) { for each in batch: collect(inner(each, w)) }
   double inner(batch l , batch r) { for each_l in l: for each_r in r: collect(inner(each_l, each_r)) }
}

class DenseFeatures : DotFeatures
{
   // this is more efficient due to storage
   double inner(batch, Vector w) { linalg::do(matrix(batch) * w) }
   double inner(batch l, batch r) { linalg;:do(matrix(l) * matrix(r)) }

   Matrix matrix(batch) {
      return a submatrix if possible (batch is continuous and no preprocessors left)
      or
      create new matrix
   }

   iterator begin() {
     // magic choosing between ranges and indices
   }

   vector<Range> ranges;
   vector<Index> indices;
   shared<Matrix> data;
}

class StreamingDotFeatures {


}


df = DataFrame()
f = Features(df).tranformed_by(Mean).transformed_by(Normalize).cached();

class Covariance {
  Covariance(some<Features> f, Options options) { dot_f = as_dot_features(f); }
  Matrix full_matrix() const { covariance = zeros(); ... call dot_f.outer_into(covariance, each, other) or dot_f.batched_dot ... }
  Matrix diagonal() const { ... compute a few of them using the same procedure ... }

  some<DotFeatures> dot_f;
}

class DotIterator {

  double inner(DotIterator other) const {
     dot_f->inner(current, other);
  }
  double inner(Vector other) const;
  void add(Vector other, double alpha)
  void auto_outer_into(Matrix outers) const;
  void outer_into(DotIterator other, Matrix outer) const;

  DotFeatures dot_f;
  iterator current;
}

class BatchedDotIterator {

  Batched(DotIterator)

  same stuff ^ but batched

}

SGDRegressor {
  // NO STATE BUT W
  SGDRegressor()

  train(Features f) {
    .. check centered ..

    // warm-start?
    auto w = initial_w(f):
    auto iterator = BatchedDotIterator(f);
    auto alpha = get(ALPHA);
    while (not stopped and iterator.has_more()) {
      iterator.add(w, alpha);
      iterator++;
    }
    set(W, w);
  }
}

sgd = SGDRegressor();
f = DenseFeatures(pd.read_csv('concrete.csv'));
sgd.set("warm_start", Vector::RandomLaplace(f.n_features()));
splitting = BinarySplitSubset(f)
train, test = splitting.first(), splitting.second()
sgd.train(train)
sgd.apply(test)

f = StreamingFeatures(HdfsStream("hdfs://my_porno_data"))
sgd.train(HeadSubset(f, 1e6))


LinearModel {
  // NO STATE BUT W
  LinearModel()

  Vector initial_w(Features f);

  train(Features f) {
       // assert
       assert(features.get_flags() & CENTERED);

       // we do not expose on model level the type of matrix/vector
       // linalg should find it out, we have an opaque Matrix/Vector class
       // which is type agnostic.
       Matrix covariance = Covariance(f).full_matrix();
       linalg::add_diag(covariance, get(LAMBDA));
       set(W, linalg::cholesky_solve(covariance, y));
  }

  Tag<Vector> LAMBDA;
  Tag<Vector> W;
}

batch optimisation is taken care automagically
model.train(SubsetView(features))


Kernel use cases:
1) Kernel ridge regression (one need to compute the whole kernel matrix)
2) Kernel SVM (compute kernel for a pair and on batches to apply)
3) Streaming MMD (compute kernel for a pair from two different streams)
4) Combined kernel (all of the above)
*) Kernel matrix precomputation
	// Immutable features

	// linear model
	- dot prod:
	- pairs
	-
	- cov var matrix: there's a Feature operator => CovarView => matrix
	-


	class Features {

	Features(){}

	...
	Features transformed_by(Transformer t) const;

	// this evaluates the stacked operators over the features
	// and returns the copy of features
	Features cached() const;

	protected:
	void add_flag(flag) {
	flags \|= flag;
	}
	int get_flags()
	}

	class DotFeatures : Features
	{
	double inner(iterator, iterator) = 0;
	double inner(iterator, Vector) = 0;
	double inner(batch, Vector w) { for each in batch: collect(inner(each, w)) }
	double inner(batch l , batch r) { for each_l in l: for each_r in r: collect(inner(each_l, each_r)) }
	}

	class DenseFeatures : DotFeatures
	{
	// this is more efficient due to storage
	double inner(batch, Vector w) { linalg::do(matrix(batch) * w) }
	double inner(batch l, batch r) { linalg;:do(matrix(l) * matrix(r)) }

	Matrix matrix(batch) {
	return a submatrix if possible (batch is continuous and no preprocessors left)
	or
	create new matrix
	}

	iterator begin() {
	// magic choosing between ranges and indices
	}

	vector<Range> ranges;
	vector<Index> indices;
	shared<Matrix> data;
	}

	class StreamingDotFeatures {



	}


	df = DataFrame()
	f = Features(df).tranformed_by(Mean).transformed_by(Normalize).cached();

	class Covariance {
	Covariance(some<Features> f, Options options) { dot_f = as_dot_features(f); }
	Matrix full_matrix() const { covariance = zeros(); ... call dot_f.outer_into(covariance, each, other) or dot_f.batched_dot ... }
	Matrix diagonal() const { ... compute a few of them using the same procedure ... }

	some<DotFeatures> dot_f;
	}

	class DotIterator {

	double inner(DotIterator other) const {
	dot_f->inner(current, other);
	}
	double inner(Vector other) const;
	void add(Vector other, double alpha)
	void auto_outer_into(Matrix outers) const;
	void outer_into(DotIterator other, Matrix outer) const;

	DotFeatures dot_f;
	iterator current;
	}

	class BatchedDotIterator {

	Batched(DotIterator)

	same stuff ^ but batched

	}

	SGDRegressor {
	// NO STATE BUT W
	SGDRegressor()

	train(Features f) {
	.. check centered ..

	// warm-start?
	auto w = initial_w(f):
	auto iterator = BatchedDotIterator(f);
	auto alpha = get(ALPHA);
	while (not stopped and iterator.has_more()) {
	iterator.add(w, alpha);
	iterator++;
	}
	set(W, w);
	}
	}

	sgd = SGDRegressor();
	f = DenseFeatures(pd.read_csv('concrete.csv'));
	sgd.set("warm_start", Vector::RandomLaplace(f.n_features()));
	splitting = BinarySplitSubset(f)
	train, test = splitting.first(), splitting.second()
	sgd.train(train)
	sgd.apply(test)

	f = StreamingFeatures(HdfsStream("hdfs://my_porno_data"))
	sgd.train(HeadSubset(f, 1e6))


	LinearModel {
	// NO STATE BUT W
	LinearModel()

	Vector initial_w(Features f);

	train(Features f) {
	// assert
	assert(features.get_flags() & CENTERED);

	// we do not expose on model level the type of matrix/vector
	// linalg should find it out, we have an opaque Matrix/Vector class
	// which is type agnostic.
	Matrix covariance = Covariance(f).full_matrix();
	linalg::add_diag(covariance, get(LAMBDA));
	set(W, linalg::cholesky_solve(covariance, y));
	}

	Tag<Vector> LAMBDA;
	Tag<Vector> W;
	}

	batch optimisation is taken care automagically
	model.train(SubsetView(features))




	Kernel use cases:
	1) Kernel ridge regression (one need to compute the whole kernel matrix)
	2) Kernel SVM (compute kernel for a pair and on batches to apply)
	3) Streaming MMD (compute kernel for a pair from two different streams)
	4) Combined kernel (all of the above)
	*) Kernel matrix precomputation