Skip to content

Instantly share code, notes, and snippets.

@joerussbowman
Created September 5, 2012 16:48
Show Gist options
  • Save joerussbowman/3639754 to your computer and use it in GitHub Desktop.
Save joerussbowman/3639754 to your computer and use it in GitHub Desktop.
apache parsing script
package main
import (
"bufio"
"bytes"
"compress/gzip"
"flag"
"fmt"
"io"
"log"
"os"
"regexp"
"strings"
)
var matchesFile *string = flag.String("m", "", "Full path to file to use for matches")
var filesFile *string = flag.String("f", "", "Full path to file to use for files list")
var outputFile *string = flag.String("o", "", "Full path to file to use for output")
func readLines(path string, isMatches bool) (lines map[string] []byte, err error) {
var prepend, append string
if isMatches {
prepend = "GET "
append = " "
}
lines = make(map[string] []byte)
f, err := os.Open(path)
defer f.Close()
if err != nil {
log.Fatal(err)
}
bf := bufio.NewReader(f)
for {
line, isPrefix, err := bf.ReadLine()
if err == io.EOF {
break
}
if err != nil {
log.Fatal(err)
}
if isPrefix {
log.Fatal("Error: unexpected long line reading", f.Name())
}
matchKey := strings.Join([]string { prepend, string(line), append }, "")
lines[matchKey] = []byte(matchKey)
}
return
}
func parseFile(matches map[string] []byte, path string) (err error) {
fmt.Println("Parsing ", path,"with ", len(matches), "matches.")
crawlers, _ := regexp.Compile("bot|index|spider|crawl|wget|slurp|Mediapartners-Google")
f, err := os.Open(path)
if err != nil {
fmt.Println("Problem opening ", path)
return
}
defer f.Close()
gz, err := gzip.NewReader(f)
if err != nil {
log.Fatal(err)
}
bf := bufio. NewReader(gz)
linenumber := 0
for {
linenumber = linenumber + 1
fmt.Println("Starting on line number ", linenumber)
line, err := bf.ReadBytes('\n')
if err == io.EOF {
break
}
if err != nil {
log.Fatal(err)
}
if crawlers.Find(line) == nil {
go func() {
for matchKey, match := range matches {
if bytes.Index(line, match) > -1 {
fmt.Println("Match found!")
delete(matches, matchKey)
fmt.Println(len(matches), " beers on the wall")
}
}
}()
}
}
return
}
func main() {
flag.Parse()
if *matchesFile == "" {
log.Fatal("Must provide matches file")
}
if *filesFile == "" {
log.Fatal("Must provide files list")
}
if *outputFile == "" {
log.Fatal("Must provide output file")
}
// get the matches into a slice
matches, err := readLines(*matchesFile, true)
if err != nil {
fmt.Println("Error: %s\n", err)
}
//get the files we will work on into a slice
files, err := readLines(*filesFile, false)
if err != nil {
fmt.Println("Error: %s\n", err)
}
// run through the files
for file, _ := range files {
err = parseFile(matches, file)
}
if err != nil {
fmt.Println("Error: %s\n", err)
}
// print the final matches
//for match, _ := range matches {
// fmt.Println(match)
// }
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment