Skip to content

Instantly share code, notes, and snippets.

@PaulCapestany
Created October 24, 2016 20:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save PaulCapestany/3b559a86e88bd4ae6dd904038c66953e to your computer and use it in GitHub Desktop.
Save PaulCapestany/3b559a86e88bd4ae6dd904038c66953e to your computer and use it in GitHub Desktop.
s3 log parsing
package main
import (
"bufio"
"flag"
"fmt"
"io"
"log"
"os"
"regexp"
"time"
)
type Log struct {
BucketOwner string
Bucket string
Time string
RemoteIP string
Requester string
RequestID string
Operation string
Key string
RequestURI string
HTTPstatus string
ErrorCode string
BytesSent string
ObjectSize string
TotalTime string
TurnAroundTime string
Referrer string
UserAgent string
VersionId string
}
func newLogLine(regexString []string) *Log {
logLine := new(Log)
logLine.BucketOwner = regexString[1]
logLine.Bucket = regexString[2]
tmp, _ := time.Parse("02/Jan/2006:15:04:05 -0700", regexString[3])
logLine.Time = tmp.Format("2006-01-02 15:04:05.000000")
logLine.RemoteIP = regexString[4]
logLine.Requester = regexString[5]
logLine.RequestID = regexString[6]
logLine.Operation = regexString[7]
logLine.Key = regexString[8]
logLine.RequestURI = regexString[9]
logLine.HTTPstatus = regexString[10]
logLine.ErrorCode = regexString[11]
logLine.BytesSent = regexString[12]
logLine.ObjectSize = regexString[13]
logLine.TotalTime = regexString[14]
logLine.TurnAroundTime = regexString[15]
logLine.Referrer = regexString[16]
logLine.UserAgent = regexString[17]
logLine.VersionId = regexString[18]
return logLine
}
func parseLogLine(line string) {
re := regexp.MustCompile(`(\S+) (\S+) \[(.*?)\] (\S+) (\S+) (\S+) (\S+) (\S+) "([^"]+)" (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) "([^"]+)" "([^"]+)" (\S)`)
matches := re.FindAllStringSubmatch(line, -1)
for i := 0; i < len(matches); i++ {
tmp := newLogLine(matches[i])
fmt.Printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", tmp.BucketOwner, tmp.Bucket, tmp.Time, tmp.RemoteIP, tmp.Requester, tmp.RequestID, tmp.Operation, tmp.Key, tmp.RequestURI, tmp.HTTPstatus, tmp.ErrorCode, tmp.BytesSent, tmp.ObjectSize, tmp.TotalTime, tmp.Referrer, tmp.UserAgent, tmp.VersionId)
}
}
func getSliceFromTextFile(fileName string) []string {
f, _ := os.Open(fileName)
defer f.Close()
bf := bufio.NewReader(f)
// initialize slice to hold all items
itemsSlice := []string{}
for {
line, isPrefix, err := bf.ReadLine()
if err == io.EOF {
break
}
if err != nil {
log.Fatal(err)
}
if isPrefix {
log.Fatal("Error: Unexpected long line reading", f.Name())
}
itemsSlice = append(itemsSlice, string(line))
}
return itemsSlice
}
func main() {
flag.Parse()
args := flag.Args()
filename := args[0]
fmt.Printf("BucketOwner\tBucket\tTime\tRemoteIP\tRequester\tRequestID\tOperation\tKey\tRequestURI\tHTTPstatus\tErrorCode\tBytesSent\tObjectSize\tTotalTime\tReferrer\tUserAgent\tVersionId\n")
lines := getSliceFromTextFile(filename)
for _, line := range lines {
parseLogLine(line)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment