-
-
Save CanoeFZH/2a062aa593ae5ce45f2088ab2ceebaaa to your computer and use it in GitHub Desktop.
fm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
model: | |
w0: line 0; 1 column; | |
w: line 1 to 20000187; 1 column | |
v: line 20000188 to end; 4 columns; | |
num_factor: 4 | |
user_feature : | |
feature, | |
entity of mimir_user_status | |
article_feature: | |
category: one_hot | |
related_image_count: >=3 logic | |
val titleKeyWords = x._2.get("title_keywords").asInstanceOf[BasicBSONList].toArray().map(keyword => "tk_%s".format(keyword)) | |
val textKeyWords = x._2.get("text_keywords").asInstanceOf[BasicBSONList].toArray().map(keyword => "tek_%s".format(keyword)) | |
val contentEntities = x._2.get("content_name_entities").asInstanceOf[BasicBSONList].toArray().map(entity => "ce_%s".format(entity)) | |
GetTagFeature(titleKeyWords ++ textKeyWords ++ contentEntities) | |
value: user_feature + article_feature | |
double fm_model::predict(sparse_row<FM_FLOAT>& x, DVector<double> &sum, DVector<double> &sum_sqr) { | |
double result = 0; | |
if (k0) { | |
result += w0; | |
} | |
if (k1) { | |
for (uint i = 0; i < x.size; i++) { | |
assert(x.data[i].id < num_attribute); | |
result += w(x.data[i].id) * x.data[i].value; | |
} | |
} | |
for (int f = 0; f < num_factor; f++) { | |
sum(f) = 0; | |
sum_sqr(f) = 0; | |
for (uint i = 0; i < x.size; i++) { | |
double d = v(f,x.data[i].id) * x.data[i].value; | |
sum(f) += d; | |
sum_sqr(f) += d*d; | |
} | |
result += 0.5 * (sum(f)*sum(f) - sum_sqr(f)); | |
} | |
return result; | |
} | |
val HASH_SIZE = 1000000 | |
val ID_HASH_SIZE = 1000000 | |
def SDBMHash(s: String, hashSize: Int): Int = { | |
val hash = ((s.foldLeft(0L)((hash, ch) => ch.toInt + (hash << 6) + (hash << 16) - hash)) % hashSize + hashSize) % hashSize | |
hash.toInt | |
} | |
def EntityToVector(entity: String): SparseVector[Double] = { | |
val scoreFeature = SparseVector.zeros[Double](HASH_SIZE) | |
if (entity != null) { | |
entity.replace("{", "").replace("}", "").replace("\"", "") | |
.split(",") | |
.map(x => ( | |
{ | |
val entityWeight = x.split(":") | |
scoreFeature(SDBMHash(entityWeight(0), HASH_SIZE)) = entityWeight(1).toFloat | |
})) | |
} | |
scoreFeature | |
} | |
def GetTagFeature(tags: Array[String]): SparseVector[Double] = { | |
val scoreFeature = SparseVector.zeros[Double](HASH_SIZE) | |
tags.map(tag => scoreFeature(SDBMHash(tag, HASH_SIZE)) = 1D) | |
scoreFeature | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment