Skip to content

Instantly share code, notes, and snippets.

@kevmal
Created May 9, 2018 19:49
Show Gist options
  • Save kevmal/07c0a27ef83282e799cf4e3962311594 to your computer and use it in GitHub Desktop.
Save kevmal/07c0a27ef83282e799cf4e3962311594 to your computer and use it in GitHub Desktop.
#r @"..\packages\NETStandard.Library.NETFramework.2.0.0-preview2-25405-01\build\net461\lib\netstandard.dll"
#r @"..\packages\Google.Protobuf.3.5.1\lib\net45\Google.Protobuf.dll" //Google.Protobuf
#r @"..\packages\Microsoft.ML.0.1.0\lib\netstandard2.0\Microsoft.ML.dll" //Microsoft.ML
#r @"..\packages\Microsoft.ML.0.1.0\lib\netstandard2.0\Microsoft.ML.Api.dll" //Microsoft.ML.Api
#r @"..\packages\Microsoft.ML.0.1.0\lib\netstandard2.0\Microsoft.ML.Core.dll" //Microsoft.ML.Core
#r @"..\packages\Microsoft.ML.0.1.0\lib\netstandard2.0\Microsoft.ML.CpuMath.dll" //Microsoft.ML.CpuMath
#r @"..\packages\Microsoft.ML.0.1.0\lib\netstandard2.0\Microsoft.ML.Data.dll" //Microsoft.ML.Data
#r @"..\packages\Microsoft.ML.0.1.0\lib\netstandard2.0\Microsoft.ML.FastTree.dll" //Microsoft.ML.FastTree
#r @"..\packages\Microsoft.ML.0.1.0\lib\netstandard2.0\Microsoft.ML.InternalStreams.dll" //Microsoft.ML.InternalStreams
#r @"..\packages\Microsoft.ML.0.1.0\lib\netstandard2.0\Microsoft.ML.KMeansClustering.dll" //Microsoft.ML.KMeansClustering
#r @"..\packages\Microsoft.ML.0.1.0\lib\netstandard2.0\Microsoft.ML.Maml.dll" //Microsoft.ML.Maml
#r @"..\packages\Microsoft.ML.0.1.0\lib\netstandard2.0\Microsoft.ML.PCA.dll" //Microsoft.ML.PCA
#r @"..\packages\Microsoft.ML.0.1.0\lib\netstandard2.0\Microsoft.ML.PipelineInference.dll" //Microsoft.ML.PipelineInference
#r @"..\packages\Microsoft.ML.0.1.0\lib\netstandard2.0\Microsoft.ML.ResultProcessor.dll" //Microsoft.ML.ResultProcessor
#r @"..\packages\Microsoft.ML.0.1.0\lib\netstandard2.0\Microsoft.ML.StandardLearners.dll" //Microsoft.ML.StandardLearners
#r @"..\packages\Microsoft.ML.0.1.0\lib\netstandard2.0\Microsoft.ML.Sweeper.dll" //Microsoft.ML.Sweeper
#r @"..\packages\Microsoft.ML.0.1.0\lib\netstandard2.0\Microsoft.ML.Transforms.dll" //Microsoft.ML.Transforms
#r @"..\packages\Microsoft.ML.0.1.0\lib\netstandard2.0\Microsoft.ML.UniversalModelFormat.dll" //Microsoft.ML.UniversalModelFormat
#r @"..\packages\Newtonsoft.Json.10.0.3\lib\net45\Newtonsoft.Json.dll" //Newtonsoft.Json
#r @"..\packages\NuGet.Client.4.0.0\lib\net45\NuGet.Client.dll" //NuGet.Client
#r @"..\packages\NuGet.Common.4.0.0\lib\net45\NuGet.Common.dll" //NuGet.Common
#r @"..\packages\NuGet.ContentModel.4.0.0\lib\net45\NuGet.ContentModel.dll" //NuGet.ContentModel
#r @"..\packages\NuGet.Frameworks.4.0.0\lib\net45\NuGet.Frameworks.dll" //NuGet.Frameworks
#r @"..\packages\NuGet.Packaging.4.0.0\lib\net45\NuGet.Packaging.dll" //NuGet.Packaging
#r @"..\packages\NuGet.Packaging.Core.4.0.0\lib\net45\NuGet.Packaging.Core.dll" //NuGet.Packaging.Core
#r @"..\packages\NuGet.Packaging.Core.Types.4.0.0\lib\net45\NuGet.Packaging.Core.Types.dll" //NuGet.Packaging.Core.Types
#r @"..\packages\NuGet.Repositories.4.0.0\lib\net45\NuGet.Repositories.dll" //NuGet.Repositories
#r @"..\packages\NuGet.RuntimeModel.4.0.0\lib\net45\NuGet.RuntimeModel.dll" //NuGet.RuntimeModel
#r @"..\packages\NuGet.Versioning.4.0.0\lib\net45\NuGet.Versioning.dll" //NuGet.Versioning
#r @"..\packages\System.CodeDom.4.4.0\lib\net461\System.CodeDom.dll" //System.CodeDom
#r @"..\packages\System.Threading.Tasks.Dataflow.4.8.0\lib\netstandard2.0\System.Threading.Tasks.Dataflow.dll" //System.Threading.Tasks.Dataflow
#r @"..\packages\System.ValueTuple.4.4.0\lib\net47\System.ValueTuple.dll" //System.ValueTuple
open Microsoft.ML.Models
open Microsoft.ML.Runtime
open Microsoft.ML.Runtime.Api
open Microsoft.ML.Trainers
open Microsoft.ML.Transforms
open System.Net
open System.IO
open Microsoft.ML
open System
//CpuMathNative location
let libDir = @"C:\packages\Microsoft.ML.0.1.0\runtimes\win-x64\native"
Environment.SetEnvironmentVariable("PATH", libDir + string Path.PathSeparator + Environment.GetEnvironmentVariable("PATH"))
let file name =
if File.Exists(name) then
name
else
use wc = new WebClient()
let url = sprintf "https://raw.githubusercontent.com/dotnet/machinelearning/master/test/data/%s" name
printfn "%s not found." name
printfn "Downloading... %s" url
wc.DownloadFile(url,name)
printfn "Done"
name
type SentimentData =
[<Column(ordinal = "0", name = "Label")>]
val mutable Sentiment : double
[<Column(ordinal = "1")>]
val mutable SentimentText : string
new() = {Sentiment = 0.0; SentimentText = ""}
new(txt) = {Sentiment = 0.0; SentimentText = txt}
type SentimentPrediction =
[<ColumnName("PredictedLabel")>]
val mutable Sentiment : bool
new() = {Sentiment = false}
let sentimentDataPath = file "wikipedia-detox-250-line-data.tsv"
let sentimentTestPath = file "wikipedia-detox-250-line-test.tsv"
let dataPath = Path.GetFullPath sentimentDataPath
let pipeline = LearningPipeline();
pipeline.Add(TextLoader<SentimentData>(dataPath,useHeader = true, separator = "tab"))
pipeline.Add(TextFeaturizer("Features", "SentimentText",
KeepDiacritics = false,
KeepPunctuations = false,
TextCase = TextNormalizerTransformCaseNormalizationMode.Lower,
OutputTokens = true,
StopWordsRemover = PredefinedStopWordsRemover(),
VectorNormalizer = TextTransformTextNormKind.L2,
CharFeatureExtractor = NGramNgramExtractor(NgramLength = 2, AllLengths = true),
WordFeatureExtractor = NGramNgramExtractor(NgramLength = 3, AllLengths = false)))
pipeline.Add(FastTreeBinaryClassifier(NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2));
pipeline.Add(PredictedLabelColumnOriginalValueConverter(PredictedLabelColumn = "PredictedLabel"));
let model = pipeline.Train<SentimentData, SentimentPrediction>()
let sentiments =
[
"Please refrain from adding nonsense to Wikipedia."
"He is a CHEATER, and the article should say that."
]
|> List.map SentimentData
let predictions = model.Predict(sentiments)
let testData = new TextLoader<SentimentData>(sentimentTestPath, useHeader = true, separator = "tab")
let evaluator = new BinaryClassificationEvaluator()
let metrics = evaluator.Evaluate(model, testData)
let matrix = metrics.ConfusionMatrix
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment