Skip to content

Instantly share code, notes, and snippets.

@pressure679
Created October 15, 2020 16:54
Show Gist options
  • Save pressure679/b1c38fe15a68bbc17eeda5f568a4e828 to your computer and use it in GitHub Desktop.
Save pressure679/b1c38fe15a68bbc17eeda5f568a4e828 to your computer and use it in GitHub Desktop.
package main
import (
"fmt"
"os"
"io/ioutil"
"flag"
"github.com/DavidBelicza/TextRank"
"github.com/DavidBelicza/TextRank/rank"
)
func main() {
file := flag.String("file", "", "file to read")
ngramlength := flag.Int("ngramlength", -1, "length of ngram (n)")
flag.Parse()
if *ngramlength == -1 { fmt.Println("argument \"ngramlength\" must be given (must be between 2 and 7)"); os.Exit(1) }
osfile, err := os.Open(*file)
if err != nil { panic(err) }
data, err := ioutil.ReadAll(osfile)
if err != nil { panic(err) }
tr := textrank.NewTextRank()
rule := textrank.NewDefaultRule()
language := textrank.NewDefaultLanguage()
algorithmDef := textrank.NewDefaultAlgorithm()
tr.Populate(string(data), language, rule)
tr.Ranking(algorithmDef)
rankedwords := textrank.FindSingleWords(tr)
for i := 0; i <= len(rankedwords) / *ngramlength; i += *ngramlength {
var rankedsentencesbyphrasechain []rank.Sentence
if *ngramlength == 2 {
rankedsentencesbyphrasechain = textrank.FindSentencesByPhraseChain(tr, []string{rankedwords[i].Word, rankedwords[i+1].Word})
if len(rankedsentencesbyphrasechain) == 0 { continue }
fmt.Println("*", rankedwords[i].Word + ", " + rankedwords[i+1].Word)
for num, _ := range rankedsentencesbyphrasechain {
fmt.Println("- ", rankedsentencesbyphrasechain[num].Value)
}
} else if *ngramlength == 3 {
rankedsentencesbyphrasechain = textrank.FindSentencesByPhraseChain(tr, []string{rankedwords[i].Word, rankedwords[i+1].Word, rankedwords[i+2].Word})
if len(rankedsentencesbyphrasechain) == 0 { continue }
fmt.Println("*", rankedwords[i].Word + ", " + rankedwords[i+1].Word + ", " + rankedwords[i+2].Word)
for num, _ := range rankedsentencesbyphrasechain {
fmt.Println("- ", rankedsentencesbyphrasechain[num].Value)
}
} else if *ngramlength == 4 {
rankedsentencesbyphrasechain = textrank.FindSentencesByPhraseChain(tr, []string{rankedwords[i].Word, rankedwords[i+1].Word, rankedwords[i+2].Word, rankedwords[i+3].Word})
if len(rankedsentencesbyphrasechain) == 0 { continue }
fmt.Println("*", rankedwords[i].Word + ", " + rankedwords[i+1].Word + ", " + rankedwords[i+2].Word + ", " + rankedwords[i+3].Word)
for num, _ := range rankedsentencesbyphrasechain {
fmt.Println("- ", rankedsentencesbyphrasechain[num].Value)
}
} else if *ngramlength == 5 {
rankedsentencesbyphrasechain = textrank.FindSentencesByPhraseChain(tr, []string{rankedwords[i].Word, rankedwords[i+1].Word, rankedwords[i+2].Word, rankedwords[i+3].Word, rankedwords[i+4].Word})
if len(rankedsentencesbyphrasechain) == 0 { continue }
fmt.Println("*", rankedwords[i].Word + ", " + rankedwords[i+1].Word + ", " + rankedwords[i+2].Word + ", " + rankedwords[i+3].Word + ", " + rankedwords[i+4].Word)
for num, _ := range rankedsentencesbyphrasechain {
fmt.Println("- ", rankedsentencesbyphrasechain[num].Value)
}
} else if *ngramlength == 6 {
rankedsentencesbyphrasechain = textrank.FindSentencesByPhraseChain(tr, []string{rankedwords[i].Word, rankedwords[i+1].Word, rankedwords[i+2].Word, rankedwords[i+3].Word, rankedwords[i+4].Word, rankedwords[i+5].Word})
if len(rankedsentencesbyphrasechain) == 0 { continue }
fmt.Println("*", rankedwords[i].Word + ", " + rankedwords[i+1].Word + ", " + rankedwords[i+2].Word + ", " + rankedwords[i+3].Word + ", " + rankedwords[i+4].Word + ", " + rankedwords[i+5].Word)
for num, _ := range rankedsentencesbyphrasechain {
fmt.Println("- ", rankedsentencesbyphrasechain[num].Value)
}
} else if *ngramlength == 7 {
rankedsentencesbyphrasechain = textrank.FindSentencesByPhraseChain(tr, []string{rankedwords[i].Word, rankedwords[i+1].Word, rankedwords[i+2].Word, rankedwords[i+3].Word, rankedwords[i+4].Word, rankedwords[i+5].Word, rankedwords[i+6].Word})
if len(rankedsentencesbyphrasechain) == 0 { continue }
fmt.Println("*", rankedwords[i].Word + ", " + rankedwords[i+1].Word + ", " + rankedwords[i+2].Word + ", " + rankedwords[i+3].Word + ", " + rankedwords[i+4].Word + ", " + rankedwords[i+5].Word + ", " + rankedwords[i+6].Word)
for num, _ := range rankedsentencesbyphrasechain {
fmt.Println("- ", rankedsentencesbyphrasechain[num].Value)
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment