Created
May 9, 2018 19:49
-
-
Save kevmal/07c0a27ef83282e799cf4e3962311594 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#r @"..\packages\NETStandard.Library.NETFramework.2.0.0-preview2-25405-01\build\net461\lib\netstandard.dll" | |
#r @"..\packages\Google.Protobuf.3.5.1\lib\net45\Google.Protobuf.dll" //Google.Protobuf | |
#r @"..\packages\Microsoft.ML.0.1.0\lib\netstandard2.0\Microsoft.ML.dll" //Microsoft.ML | |
#r @"..\packages\Microsoft.ML.0.1.0\lib\netstandard2.0\Microsoft.ML.Api.dll" //Microsoft.ML.Api | |
#r @"..\packages\Microsoft.ML.0.1.0\lib\netstandard2.0\Microsoft.ML.Core.dll" //Microsoft.ML.Core | |
#r @"..\packages\Microsoft.ML.0.1.0\lib\netstandard2.0\Microsoft.ML.CpuMath.dll" //Microsoft.ML.CpuMath | |
#r @"..\packages\Microsoft.ML.0.1.0\lib\netstandard2.0\Microsoft.ML.Data.dll" //Microsoft.ML.Data | |
#r @"..\packages\Microsoft.ML.0.1.0\lib\netstandard2.0\Microsoft.ML.FastTree.dll" //Microsoft.ML.FastTree | |
#r @"..\packages\Microsoft.ML.0.1.0\lib\netstandard2.0\Microsoft.ML.InternalStreams.dll" //Microsoft.ML.InternalStreams | |
#r @"..\packages\Microsoft.ML.0.1.0\lib\netstandard2.0\Microsoft.ML.KMeansClustering.dll" //Microsoft.ML.KMeansClustering | |
#r @"..\packages\Microsoft.ML.0.1.0\lib\netstandard2.0\Microsoft.ML.Maml.dll" //Microsoft.ML.Maml | |
#r @"..\packages\Microsoft.ML.0.1.0\lib\netstandard2.0\Microsoft.ML.PCA.dll" //Microsoft.ML.PCA | |
#r @"..\packages\Microsoft.ML.0.1.0\lib\netstandard2.0\Microsoft.ML.PipelineInference.dll" //Microsoft.ML.PipelineInference | |
#r @"..\packages\Microsoft.ML.0.1.0\lib\netstandard2.0\Microsoft.ML.ResultProcessor.dll" //Microsoft.ML.ResultProcessor | |
#r @"..\packages\Microsoft.ML.0.1.0\lib\netstandard2.0\Microsoft.ML.StandardLearners.dll" //Microsoft.ML.StandardLearners | |
#r @"..\packages\Microsoft.ML.0.1.0\lib\netstandard2.0\Microsoft.ML.Sweeper.dll" //Microsoft.ML.Sweeper | |
#r @"..\packages\Microsoft.ML.0.1.0\lib\netstandard2.0\Microsoft.ML.Transforms.dll" //Microsoft.ML.Transforms | |
#r @"..\packages\Microsoft.ML.0.1.0\lib\netstandard2.0\Microsoft.ML.UniversalModelFormat.dll" //Microsoft.ML.UniversalModelFormat | |
#r @"..\packages\Newtonsoft.Json.10.0.3\lib\net45\Newtonsoft.Json.dll" //Newtonsoft.Json | |
#r @"..\packages\NuGet.Client.4.0.0\lib\net45\NuGet.Client.dll" //NuGet.Client | |
#r @"..\packages\NuGet.Common.4.0.0\lib\net45\NuGet.Common.dll" //NuGet.Common | |
#r @"..\packages\NuGet.ContentModel.4.0.0\lib\net45\NuGet.ContentModel.dll" //NuGet.ContentModel | |
#r @"..\packages\NuGet.Frameworks.4.0.0\lib\net45\NuGet.Frameworks.dll" //NuGet.Frameworks | |
#r @"..\packages\NuGet.Packaging.4.0.0\lib\net45\NuGet.Packaging.dll" //NuGet.Packaging | |
#r @"..\packages\NuGet.Packaging.Core.4.0.0\lib\net45\NuGet.Packaging.Core.dll" //NuGet.Packaging.Core | |
#r @"..\packages\NuGet.Packaging.Core.Types.4.0.0\lib\net45\NuGet.Packaging.Core.Types.dll" //NuGet.Packaging.Core.Types | |
#r @"..\packages\NuGet.Repositories.4.0.0\lib\net45\NuGet.Repositories.dll" //NuGet.Repositories | |
#r @"..\packages\NuGet.RuntimeModel.4.0.0\lib\net45\NuGet.RuntimeModel.dll" //NuGet.RuntimeModel | |
#r @"..\packages\NuGet.Versioning.4.0.0\lib\net45\NuGet.Versioning.dll" //NuGet.Versioning | |
#r @"..\packages\System.CodeDom.4.4.0\lib\net461\System.CodeDom.dll" //System.CodeDom | |
#r @"..\packages\System.Threading.Tasks.Dataflow.4.8.0\lib\netstandard2.0\System.Threading.Tasks.Dataflow.dll" //System.Threading.Tasks.Dataflow | |
#r @"..\packages\System.ValueTuple.4.4.0\lib\net47\System.ValueTuple.dll" //System.ValueTuple | |
open Microsoft.ML.Models | |
open Microsoft.ML.Runtime | |
open Microsoft.ML.Runtime.Api | |
open Microsoft.ML.Trainers | |
open Microsoft.ML.Transforms | |
open System.Net | |
open System.IO | |
open Microsoft.ML | |
open System | |
//CpuMathNative location | |
let libDir = @"C:\packages\Microsoft.ML.0.1.0\runtimes\win-x64\native" | |
Environment.SetEnvironmentVariable("PATH", libDir + string Path.PathSeparator + Environment.GetEnvironmentVariable("PATH")) | |
let file name = | |
if File.Exists(name) then | |
name | |
else | |
use wc = new WebClient() | |
let url = sprintf "https://raw.githubusercontent.com/dotnet/machinelearning/master/test/data/%s" name | |
printfn "%s not found." name | |
printfn "Downloading... %s" url | |
wc.DownloadFile(url,name) | |
printfn "Done" | |
name | |
type SentimentData = | |
[<Column(ordinal = "0", name = "Label")>] | |
val mutable Sentiment : double | |
[<Column(ordinal = "1")>] | |
val mutable SentimentText : string | |
new() = {Sentiment = 0.0; SentimentText = ""} | |
new(txt) = {Sentiment = 0.0; SentimentText = txt} | |
type SentimentPrediction = | |
[<ColumnName("PredictedLabel")>] | |
val mutable Sentiment : bool | |
new() = {Sentiment = false} | |
let sentimentDataPath = file "wikipedia-detox-250-line-data.tsv" | |
let sentimentTestPath = file "wikipedia-detox-250-line-test.tsv" | |
let dataPath = Path.GetFullPath sentimentDataPath | |
let pipeline = LearningPipeline(); | |
pipeline.Add(TextLoader<SentimentData>(dataPath,useHeader = true, separator = "tab")) | |
pipeline.Add(TextFeaturizer("Features", "SentimentText", | |
KeepDiacritics = false, | |
KeepPunctuations = false, | |
TextCase = TextNormalizerTransformCaseNormalizationMode.Lower, | |
OutputTokens = true, | |
StopWordsRemover = PredefinedStopWordsRemover(), | |
VectorNormalizer = TextTransformTextNormKind.L2, | |
CharFeatureExtractor = NGramNgramExtractor(NgramLength = 2, AllLengths = true), | |
WordFeatureExtractor = NGramNgramExtractor(NgramLength = 3, AllLengths = false))) | |
pipeline.Add(FastTreeBinaryClassifier(NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2)); | |
pipeline.Add(PredictedLabelColumnOriginalValueConverter(PredictedLabelColumn = "PredictedLabel")); | |
let model = pipeline.Train<SentimentData, SentimentPrediction>() | |
let sentiments = | |
[ | |
"Please refrain from adding nonsense to Wikipedia." | |
"He is a CHEATER, and the article should say that." | |
] | |
|> List.map SentimentData | |
let predictions = model.Predict(sentiments) | |
let testData = new TextLoader<SentimentData>(sentimentTestPath, useHeader = true, separator = "tab") | |
let evaluator = new BinaryClassificationEvaluator() | |
let metrics = evaluator.Evaluate(model, testData) | |
let matrix = metrics.ConfusionMatrix | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment