Skip to content

Instantly share code, notes, and snippets.

@samueleresca
Last active April 23, 2019 06:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save samueleresca/74bf581877a5971ad5aded7a4a2e4bc1 to your computer and use it in GitHub Desktop.
Save samueleresca/74bf581877a5971ad5aded7a4a2e4bc1 to your computer and use it in GitHub Desktop.
open Microsoft.ML
open Microsoft.ML
open Microsoft.ML.Data
open Microsoft.ML.Transforms.Text
let stopwords = [|"ourselves"; "hers"; "between"; "yourself"; "but"; "again"; "there"; "about"; "once"; "during"; "out"; "very"; "having"; "with"; "they"; "own"; "an"; "be"; "some"; "for"; "do"; "its"; "yours"; "such"; "into"; "of"; "most"; "itself"; "other"; "off"; "is"; "s"; "am"; "or"; "who"; "as"; "from"; "him"; "each"; "the"; "themselves"; "until"; "below"; "are"; "we"; "these"; "your"; "his"; "through"; "don"; "nor"; "me"; "were"; "her"; "more"; "himself"; "this"; "down"; "should"; "our"; "their"; "while"; "above"; "both"; "up"; "to"; "ours"; "had"; "she"; "all"; "no"; "when"; "at"; "any"; "before"; "them"; "same"; "and"; "been"; "have"; "in"; "will"; "on"; "does"; "yourselves"; "then"; "that"; "because"; "what"; "over"; "why"; "so"; "can"; "did"; "not"; "now"; "under"; "he"; "you"; "herself"; "has"; "just"; "where"; "too"; "only"; "myself"; "which"; "those"; "i"; "after"; "few"; "whom"; "t";"ll"; "being"; "if"; "theirs"; "my"; "against"; "a"; "by"; "doing"; "it"; "how"; "further"; "was"; "here"; "than"; "s"; "t"; "m"; "'re"; "'ll";"ve";"..."; "ä±"; "''"; "``"; "--"; "'d"; "el"; "la"; "que"; "y"; "de"; "en"|]
let symbols = [|'\''; ' '; ','|]
let renderLineChartForWords(words: seq<string>) =
words
|> Seq.countBy id
|> Seq.sortByDescending(fun (value:string, count :int) -> count)
|> Seq.take 15
|> Chart.Line
let tokenizeLyrics (lyrics: seq<LyricsInput>) =
let mlContext = MLContext(seed = Nullable 0)
let data = mlContext.Data.LoadFromEnumerable lyrics
let pipeline = mlContext.Transforms.Text.FeaturizeText("FeaturizedLyrics", "Lyrics")
.Append(mlContext.Transforms.Text.NormalizeText("NormalizedLyrics", "Lyrics"))
.Append(mlContext.Transforms.Text.TokenizeWords("TokenizedLyric", "NormalizedLyrics", symbols))
.Append(mlContext.Transforms.Text.RemoveStopWords("LyricsWithNoCustomStopWords", "TokenizedLyric", stopwords))
.Append(mlContext.Transforms.Text.RemoveDefaultStopWords("LyricsWithNoStopWords", "LyricsWithNoCustomStopWords"))
let transformedData = pipeline.Fit(data).Transform(data)
transformedData.GetColumn<string[]>(mlContext, "LyricsWithNoStopWords")
|> Seq.concat
|> Seq.toList
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment