Skip to content

Instantly share code, notes, and snippets.

@paralax
Last active September 29, 2019 13:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save paralax/9ac5d8fda47ec506ed74a5b88525e6e6 to your computer and use it in GitHub Desktop.
Save paralax/9ac5d8fda47ec506ed74a5b88525e6e6 to your computer and use it in GitHub Desktop.
recog_match for recog in golang
package main
// # build
// $ go get github.com/hdm/recog-go
// $ go build
// # install recog for the XML files
// $ git clone https://github.com/hdm/recog.git
// # usage:
// $ echo "Apache/2.4.33 (Unix) OpenSSL/1.0.2o" | ./recog_match recog/xml
import (
"bufio"
"encoding/json"
"fmt"
"log"
"os"
"path/filepath"
"strings"
recog "github.com/hdm/recog-go"
)
func visit(files *[]string) filepath.WalkFunc {
return func(path string, info os.FileInfo, err error) error {
if err != nil {
log.Fatal(err)
}
if info.IsDir() || filepath.Ext(path) != ".xml" {
return nil
}
*files = append(*files, path)
return nil
}
}
func fingerprint(fingerprints []recog.FingerprintDB, text string) {
for _, term := range strings.Fields(text) {
for _, fdb := range fingerprints {
matches := fdb.MatchAll(term)
for _, match := range matches {
if len(match.Errors) > 0 {
fmt.Printf("%q", match.Errors)
}
if match.Matched {
j, _ := json.Marshal(match.Values)
fmt.Printf("%s\n", j)
}
}
}
}
}
func main() {
var files []string
if len(os.Args) < 2 {
log.Fatalf("missing: recog xml directory")
}
err := filepath.Walk(os.Args[1], visit(&files))
if err != nil {
log.Fatal(err)
}
var fingerprints []recog.FingerprintDB
for _, file := range files {
fdb, err := recog.LoadFingerprintDBFromFile(file)
if err != nil {
log.Fatalf("error loading fingerprints from %s: %s", file, err)
}
fingerprints = append(fingerprints, fdb)
}
var text string
text = strings.Join(os.Args[2:], " ")
if len(text) < 1 {
scanner := bufio.NewScanner(os.Stdin)
for scanner.Scan() {
text = scanner.Text()
fingerprint(fingerprints, text)
}
} else {
fingerprint(fingerprints, text)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment