Skip to content

Instantly share code, notes, and snippets.

@pressure679
Created September 19, 2019 14:30
Show Gist options
  • Save pressure679/abd6f646f94fc1c00926a94cd6570f45 to your computer and use it in GitHub Desktop.
Save pressure679/abd6f646f94fc1c00926a94cd6570f45 to your computer and use it in GitHub Desktop.
extract nouns from a txt file (english)
package main
import (
"fmt"
"os"
"github.com/mvryan/fasttag"
// "github.com/neurosnap/sentences"
// "github.com/neurosnap/sentences/data"
// "sort"
"io/ioutil"
// "math"
"strings"
"flag"
)
func main() {
// files, err := string, error.Error
// files, err := GetFilesFromDir("/data/data/com.termux/files/home/storage/external-1/go/src/legacy/pressure679/GhostWriter/txts")
file := flag.String("file", "", "file with or without path variable.")
flag.Parse()
osFile, err := os.Open(*file)
if err != nil { panic(err) }
defer osFile.Close()
// file := ""
bytes, err := ioutil.ReadFile(osFile.Name())
// In case you get a text with non-sentence text then use this as a filter extractor to get texts exclusively.
// var buffer []*string
// if err != nil { panic(err) }
// dataAsset, err := data.Asset("data/english.json")
// if err != nil { panic(err) }
// trainingData, err := sentences.LoadTraining(dataAsset)
// if err != nil { panic(err) }
// sentenceTokenizer := sentences.NewSentenceTokenizer(trainingData)
words := fasttag.WordsToSlice(string(bytes))
posTags := fasttag.BrillTagger(words)
for n, _ := range words {
if strings.EqualFold(string(posTags[n][0]), "N") {
fmt.Println(words[n])
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment