Skip to content

Instantly share code, notes, and snippets.

@abfo
Created September 22, 2018 23:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save abfo/92323644e642a0555af7527f7c7c17ca to your computer and use it in GitHub Desktop.
Save abfo/92323644e642a0555af7527f7c7c17ca to your computer and use it in GitHub Desktop.
// load word2vec model
_vocabulary = new Word2VecBinaryReader().Read(HostingEnvironment.MapPath("~/App_Data/GoogleNews-vectors-negative300-SLIM.bin"));
// ... collect all words in post (words is List<string>, not included as implementation specific) ...
double[] vector = new double[_vocabulary.VectorDimensionsCount];
// add all words that exist in the vocabulary
int inVocabularyCount = 0;
foreach(string word in words)
{
try
{
float[] wordVector = _vocabulary.GetRepresentationFor(word).NumericVector;
for (int i = 0; i < _vocabulary.VectorDimensionsCount; i++)
{
vector[i] += (double)wordVector[i];
}
inVocabularyCount++;
}
catch
{
// ignore anything not in vocabulary
}
}
// return the average of the vectors
for (int i = 0; i < _vocabulary.VectorDimensionsCount; i++)
{
vector[i] /= inVocabularyCount;
}
return vector;
// ... once all posts have a vector we just find the closest ones, compare this post to others and sort the list by distance ...
double d = Distance.Cosine(vectorThisPost, vectorOtherPost);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment