Skip to content

Instantly share code, notes, and snippets.

@zgramana
Forked from mathias-brandewinder/word2vec.fsx
Created September 22, 2016 22:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zgramana/1dd6dcb777d671adf8b64d7f6d0c38ad to your computer and use it in GitHub Desktop.
Save zgramana/1dd6dcb777d671adf8b64d7f6d0c38ad to your computer and use it in GitHub Desktop.
Word2Vec experiment
#I "../packages/"
#r @"FSharp.Data/lib/net40/FSharp.Data.dll"
#r @"StemmersNet/lib/net20/StemmersNet.dll"
#r @"FSharp.Collections.ParallelSeq/lib/net40/FSharp.Collections.ParallelSeq.dll"
#load "Utilities.fs"
open FSharp.Data
[<Literal>]
let trainPath = @"../data/train.csv"
[<Literal>]
let testPath = @"../data/test.csv"
[<Literal>]
let attributesPath = @"../data/attributes.csv"
[<Literal>]
let productsPath = @"..\data\product_descriptions.csv"
[<Literal>]
let submissionPath = @"../data/"
type Train = CsvProvider<trainPath,Schema=",,,,float">
type Test = CsvProvider<testPath>
let sample = seq {
yield! Train.GetSample().Rows |> Seq.map (fun x -> x.Product_title)
yield! Test.GetSample().Rows |> Seq.map (fun x -> x.Product_title)
}
#load "Utilities.fs"
open HomeDepot.Utilities
let titles =
sample
|> Seq.distinct
|> Seq.map (preprocess)
|> Seq.toArray
let path = @"C:\users\mathias brandewinder\desktop\titles.txt"
let file = System.IO.File.WriteAllLines(path,titles)
#r @"C:\Users\Mathias Brandewinder\Documents\GitHub\Word2Vec.Net\Word2Vec.Net\bin\Release\Word2Vec.Net.dll"
open Word2Vec
let output = @"C:\users\mathias brandewinder\desktop\output.txt"
let vocab = @"C:\users\mathias brandewinder\desktop\vocab.txt"
let builder =
Word2Vec.Net.Word2VecBuilder
.Create()
.WithTrainFile(path)
.WithOutputFile(output)
.WithBinary(1)
// .WithCBow(1)
.WithSize(50)
.WithSaveVocubFile(vocab)
.WithWindow(5)
.Build()
builder.TrainModel()
let distance = Word2Vec.Net.Distance(output)
distance.Search("shower")
let analogy = Word2Vec.Net.WordAnalogy(output)
analogy.Search("metal stakes")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment