Skip to content

Instantly share code, notes, and snippets.

@CanoeFZH
Created March 29, 2018 09:17
Show Gist options
  • Save CanoeFZH/2a062aa593ae5ce45f2088ab2ceebaaa to your computer and use it in GitHub Desktop.
Save CanoeFZH/2a062aa593ae5ce45f2088ab2ceebaaa to your computer and use it in GitHub Desktop.
fm
model:
w0: line 0; 1 column;
w: line 1 to 20000187; 1 column
v: line 20000188 to end; 4 columns;
num_factor: 4
user_feature :
feature,
entity of mimir_user_status
article_feature:
category: one_hot
related_image_count: >=3 logic
val titleKeyWords = x._2.get("title_keywords").asInstanceOf[BasicBSONList].toArray().map(keyword => "tk_%s".format(keyword))
val textKeyWords = x._2.get("text_keywords").asInstanceOf[BasicBSONList].toArray().map(keyword => "tek_%s".format(keyword))
val contentEntities = x._2.get("content_name_entities").asInstanceOf[BasicBSONList].toArray().map(entity => "ce_%s".format(entity))
GetTagFeature(titleKeyWords ++ textKeyWords ++ contentEntities)
value: user_feature + article_feature
double fm_model::predict(sparse_row<FM_FLOAT>& x, DVector<double> &sum, DVector<double> &sum_sqr) {
double result = 0;
if (k0) {
result += w0;
}
if (k1) {
for (uint i = 0; i < x.size; i++) {
assert(x.data[i].id < num_attribute);
result += w(x.data[i].id) * x.data[i].value;
}
}
for (int f = 0; f < num_factor; f++) {
sum(f) = 0;
sum_sqr(f) = 0;
for (uint i = 0; i < x.size; i++) {
double d = v(f,x.data[i].id) * x.data[i].value;
sum(f) += d;
sum_sqr(f) += d*d;
}
result += 0.5 * (sum(f)*sum(f) - sum_sqr(f));
}
return result;
}
val HASH_SIZE = 1000000
val ID_HASH_SIZE = 1000000
def SDBMHash(s: String, hashSize: Int): Int = {
val hash = ((s.foldLeft(0L)((hash, ch) => ch.toInt + (hash << 6) + (hash << 16) - hash)) % hashSize + hashSize) % hashSize
hash.toInt
}
def EntityToVector(entity: String): SparseVector[Double] = {
val scoreFeature = SparseVector.zeros[Double](HASH_SIZE)
if (entity != null) {
entity.replace("{", "").replace("}", "").replace("\"", "")
.split(",")
.map(x => (
{
val entityWeight = x.split(":")
scoreFeature(SDBMHash(entityWeight(0), HASH_SIZE)) = entityWeight(1).toFloat
}))
}
scoreFeature
}
def GetTagFeature(tags: Array[String]): SparseVector[Double] = {
val scoreFeature = SparseVector.zeros[Double](HASH_SIZE)
tags.map(tag => scoreFeature(SDBMHash(tag, HASH_SIZE)) = 1D)
scoreFeature
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment