Created
January 1, 2020 08:05
-
-
Save gsscoder/38b06bf6ab8596ce63ac09f7086f6f62 to your computer and use it in GitHub Desktop.
F# program that demonstrates the use of PickAll with Standford NLP POS Tagger
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(* | |
Demonstrates the use of PickAll with Standford NLP POS Tagger | |
PickAll: | |
- https://github.com/gsscoder/pickall | |
Derived from: | |
- http://sergey-tihon.github.io/Stanford.NLP.NET/samples/POSTagger.Sample.html | |
TargetFramework: | |
- net452 | |
References: | |
- netstandard | |
- System.Text.Encoding.CodePages | |
- PickAll | |
- AngleSharp | |
POS tagger: | |
- https://github.com/gsscoder/test-data/blob/master/pos-taggers/wsj-0-18-bidirectional-nodistsim.tagger | |
*) | |
open System.IO | |
open java.io | |
open java.util | |
open edu.stanford.nlp.ling | |
open edu.stanford.nlp.tagger.maxent | |
open PickAll | |
open PickAll.Searchers | |
open PickAll.PostProcessors | |
// Path to the folder with models | |
let model = Path.Combine(".", "wsj-0-18-bidirectional-nodistsim.tagger") | |
if (not <| File.Exists(model)) | |
then failwithf "Check path to the model file '%s'" model | |
// Loading POS Tagger | |
let tagger = MaxentTagger(model) | |
let tagTexrFromReader (reader:Reader) = | |
let sentances = MaxentTagger.tokenizeText(reader).toArray() | |
sentances |> Seq.map (fun sentence -> | |
let taggedSentence = tagger.tagSentence(sentence :?> ArrayList) | |
sprintf @"%O" (SentenceUtils.listToString(taggedSentence, false)) | |
) |> String.concat " " | |
// Text for tagging | |
let textFromWeb = | |
let context = (new SearchContext(maximumResults=10u)) | |
.With<Google>() | |
.With<Yahoo>() | |
.With<Wordify>(new WordifySettings(IncludeTitle = true)) | |
let results = context.SearchAsync("steve jobs") | |
|> Async.AwaitTask | |
|> Async.RunSynchronously | |
results |> Seq.map (fun x -> (x.Data :?> WordifyData).Words) | |
|> Seq.concat | |
|> Seq.take 50 | |
|> String.concat " " | |
[<EntryPoint>] | |
let main argv = | |
let tagged = tagTexrFromReader <| new StringReader(textFromWeb) | |
printfn "%s" tagged | |
0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment