Skip to content

Instantly share code, notes, and snippets.

@gsscoder
Created January 1, 2020 08:05
Show Gist options
  • Save gsscoder/38b06bf6ab8596ce63ac09f7086f6f62 to your computer and use it in GitHub Desktop.
Save gsscoder/38b06bf6ab8596ce63ac09f7086f6f62 to your computer and use it in GitHub Desktop.
F# program that demonstrates the use of PickAll with Standford NLP POS Tagger
(*
Demonstrates the use of PickAll with Standford NLP POS Tagger
PickAll:
- https://github.com/gsscoder/pickall
Derived from:
- http://sergey-tihon.github.io/Stanford.NLP.NET/samples/POSTagger.Sample.html
TargetFramework:
- net452
References:
- netstandard
- System.Text.Encoding.CodePages
- PickAll
- AngleSharp
POS tagger:
- https://github.com/gsscoder/test-data/blob/master/pos-taggers/wsj-0-18-bidirectional-nodistsim.tagger
*)
open System.IO
open java.io
open java.util
open edu.stanford.nlp.ling
open edu.stanford.nlp.tagger.maxent
open PickAll
open PickAll.Searchers
open PickAll.PostProcessors
// Path to the folder with models
let model = Path.Combine(".", "wsj-0-18-bidirectional-nodistsim.tagger")
if (not <| File.Exists(model))
then failwithf "Check path to the model file '%s'" model
// Loading POS Tagger
let tagger = MaxentTagger(model)
let tagTexrFromReader (reader:Reader) =
let sentances = MaxentTagger.tokenizeText(reader).toArray()
sentances |> Seq.map (fun sentence ->
let taggedSentence = tagger.tagSentence(sentence :?> ArrayList)
sprintf @"%O" (SentenceUtils.listToString(taggedSentence, false))
) |> String.concat " "
// Text for tagging
let textFromWeb =
let context = (new SearchContext(maximumResults=10u))
.With<Google>()
.With<Yahoo>()
.With<Wordify>(new WordifySettings(IncludeTitle = true))
let results = context.SearchAsync("steve jobs")
|> Async.AwaitTask
|> Async.RunSynchronously
results |> Seq.map (fun x -> (x.Data :?> WordifyData).Words)
|> Seq.concat
|> Seq.take 50
|> String.concat " "
[<EntryPoint>]
let main argv =
let tagged = tagTexrFromReader <| new StringReader(textFromWeb)
printfn "%s" tagged
0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment