Forked from vigsterkr/gist:6f75563c64f62b33b4ac7d0e09ab22d5
Last active
April 25, 2017 00:45
-
-
Save lisitsyn/a6d8ff6e8690431f967c5318c3750919 to your computer and use it in GitHub Desktop.
Heiko is worrie that we will loose it :)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Immutable features | |
// linear model | |
- dot prod: | |
- pairs | |
- | |
- cov var matrix: there's a Feature operator => CovarView => matrix | |
- | |
class Features { | |
Features(){} | |
... | |
Features transformed_by(Transformer t) const; | |
// this evaluates the stacked operators over the features | |
// and returns the copy of features | |
Features cached() const; | |
protected: | |
void add_flag(flag) { | |
flags |= flag; | |
} | |
int get_flags() | |
} | |
class DotFeatures : Features | |
{ | |
double inner(iterator, iterator) = 0; | |
double inner(iterator, Vector) = 0; | |
double inner(batch, Vector w) { for each in batch: collect(inner(each, w)) } | |
double inner(batch l , batch r) { for each_l in l: for each_r in r: collect(inner(each_l, each_r)) } | |
} | |
class DenseFeatures : DotFeatures | |
{ | |
// this is more efficient due to storage | |
double inner(batch, Vector w) { linalg::do(matrix(batch) * w) } | |
double inner(batch l, batch r) { linalg;:do(matrix(l) * matrix(r)) } | |
Matrix matrix(batch) { | |
return a submatrix if possible (batch is continuous and no preprocessors left) | |
or | |
create new matrix | |
} | |
iterator begin() { | |
// magic choosing between ranges and indices | |
} | |
vector<Range> ranges; | |
vector<Index> indices; | |
shared<Matrix> data; | |
} | |
class StreamingDotFeatures { | |
} | |
df = DataFrame() | |
f = Features(df).tranformed_by(Mean).transformed_by(Normalize).cached(); | |
class Covariance { | |
Covariance(some<Features> f, Options options) { dot_f = as_dot_features(f); } | |
Matrix full_matrix() const { covariance = zeros(); ... call dot_f.outer_into(covariance, each, other) or dot_f.batched_dot ... } | |
Matrix diagonal() const { ... compute a few of them using the same procedure ... } | |
some<DotFeatures> dot_f; | |
} | |
class DotIterator { | |
double inner(DotIterator other) const { | |
dot_f->inner(current, other); | |
} | |
double inner(Vector other) const; | |
void add(Vector other, double alpha) | |
void auto_outer_into(Matrix outers) const; | |
void outer_into(DotIterator other, Matrix outer) const; | |
DotFeatures dot_f; | |
iterator current; | |
} | |
class BatchedDotIterator { | |
Batched(DotIterator) | |
same stuff ^ but batched | |
} | |
SGDRegressor { | |
// NO STATE BUT W | |
SGDRegressor() | |
train(Features f) { | |
.. check centered .. | |
// warm-start? | |
auto w = initial_w(f): | |
auto iterator = BatchedDotIterator(f); | |
auto alpha = get(ALPHA); | |
while (not stopped and iterator.has_more()) { | |
iterator.add(w, alpha); | |
iterator++; | |
} | |
set(W, w); | |
} | |
} | |
sgd = SGDRegressor(); | |
f = DenseFeatures(pd.read_csv('concrete.csv')); | |
sgd.set("warm_start", Vector::RandomLaplace(f.n_features())); | |
splitting = BinarySplitSubset(f) | |
train, test = splitting.first(), splitting.second() | |
sgd.train(train) | |
sgd.apply(test) | |
f = StreamingFeatures(HdfsStream("hdfs://my_porno_data")) | |
sgd.train(HeadSubset(f, 1e6)) | |
LinearModel { | |
// NO STATE BUT W | |
LinearModel() | |
Vector initial_w(Features f); | |
train(Features f) { | |
// assert | |
assert(features.get_flags() & CENTERED); | |
// we do not expose on model level the type of matrix/vector | |
// linalg should find it out, we have an opaque Matrix/Vector class | |
// which is type agnostic. | |
Matrix covariance = Covariance(f).full_matrix(); | |
linalg::add_diag(covariance, get(LAMBDA)); | |
set(W, linalg::cholesky_solve(covariance, y)); | |
} | |
Tag<Vector> LAMBDA; | |
Tag<Vector> W; | |
} | |
batch optimisation is taken care automagically | |
model.train(SubsetView(features)) | |
Kernel use cases: | |
1) Kernel ridge regression (one need to compute the whole kernel matrix) | |
2) Kernel SVM (compute kernel for a pair and on batches to apply) | |
3) Streaming MMD (compute kernel for a pair from two different streams) | |
4) Combined kernel (all of the above) | |
*) Kernel matrix precomputation |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment