Skip to content

Instantly share code, notes, and snippets.

@david415
Created December 17, 2019 05:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save david415/8523f189ed6c49d08c112d5ba236bc9b to your computer and use it in GitHub Desktop.
Save david415/8523f189ed6c49d08c112d5ba236bc9b to your computer and use it in GitHub Desktop.
analytics.go
package main
import (
"bufio"
"flag"
"fmt"
"io"
"os"
"sort"
"strconv"
"strings"
"time"
)
func parseFile(inputFile string) ([][]string, error) {
file, err := os.Open(inputFile)
if err != nil {
return nil, err
}
reader := bufio.NewReader(file)
output := make([][]string, 0)
for {
line, _, err := reader.ReadLine()
if err == io.EOF {
break
}
if err != nil {
return nil, err
}
// date, timestamp, userid, countryid, siteid
fields := strings.Fields(string(line))
output = append(output, fields)
}
return output, nil
}
func BDVPopularity(input [][]string) (string, error) {
// site -> user -> bool
m := make(map[string]map[string]bool)
for _, fields := range input {
// date, timestamp, userid, countryid, siteid
// country must be BDV
if fields[3] != "BDV" {
continue
}
// check for site_id
users, ok := m[fields[4]]
if ok {
// check for user_id
if _, ok := users[fields[2]]; !ok {
users[fields[2]] = true
}
} else {
// set site_id's users map
m[fields[4]] = make(map[string]bool)
m[fields[4]][fields[2]] = true
}
}
max := 0
winner := ""
for site, users := range m {
if len(users) > max {
max = len(users)
winner = site
}
}
return winner, nil
}
type SiteCount struct {
site string
count int
}
type SiteCounts []SiteCount
func (s SiteCounts) Len() int { return len(s) }
func (s SiteCounts) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s SiteCounts) Less(i, j int) bool { return s[i].count < s[j].count }
type hit struct {
site string
time time.Time
}
func ParseDateTime(dateStr, timeStr string) (time.Time, error) {
dateFields := strings.Split(dateStr, "-")
timeFields := strings.Split(timeStr, ":")
year, err := strconv.Atoi(dateFields[0])
if err != nil {
return time.Time{}, err
}
month, err := strconv.Atoi(dateFields[1])
if err != nil {
return time.Time{}, err
}
day, err := strconv.Atoi(dateFields[2])
if err != nil {
return time.Time{}, err
}
hour, err := strconv.Atoi(timeFields[0])
if err != nil {
return time.Time{}, err
}
minute, err := strconv.Atoi(timeFields[1])
if err != nil {
return time.Time{}, err
}
sec, err := strconv.Atoi(timeFields[2])
if err != nil {
return time.Time{}, err
}
t := time.Date(year, time.Month(month), day, hour, minute, sec, 0, time.UTC)
return t, nil
}
func UsersFirstLastCount(input [][]string) (int, error) {
// user -> hit
firstMap := make(map[string]hit)
lastMap := make(map[string]hit)
// site_id -> user_unique_num
siteLast := make(map[string]int)
siteCountList := make([]SiteCount, 0)
// user -> site -> count
windowUsers := make(map[string]map[string]int)
windowStart := time.Date(2019, 02, 03, 0, 0, 0, 0, time.UTC)
windowEnd := time.Date(2019, 02, 04, 23, 59, 59, 0, time.UTC)
for _, fields := range input {
// date, timestamp, userid, countryid, siteid
t, err := ParseDateTime(fields[0], fields[1])
if err != nil {
return 0, err
}
if t.After(windowStart) && windowEnd.After(t) {
// if we within the window for the specified problem/question?
_, ok := windowUsers[fields[2]]
if !ok {
windowUsers[fields[2]] = make(map[string]int)
windowUsers[fields[2]][fields[4]] = 1
} else {
_, ok := windowUsers[fields[2]][fields[4]]
if !ok {
windowUsers[fields[2]][fields[4]] = 1
} else {
windowUsers[fields[2]][fields[4]] = +1
}
}
}
if _, ok := firstMap[fields[2]]; !ok {
firstMap[fields[2]] = hit{
site: fields[4],
time: t,
}
} else {
if firstMap[fields[2]].time.After(t) {
h := firstMap[fields[2]]
h.site = fields[4]
h.time = t
}
}
if _, ok := lastMap[fields[2]]; !ok {
lastMap[fields[2]] = hit{
site: fields[4],
time: t,
}
} else {
if lastMap[fields[2]].time.After(t) {
h := lastMap[fields[2]]
h.site = fields[4]
h.time = t
}
}
} // for
fmt.Println("time window users")
for user, counts := range windowUsers {
for site, count := range counts {
if count > 10 {
fmt.Printf("user %s site %s count %d\n", user, site, count)
}
}
}
for _, lastHit := range lastMap {
if _, ok := siteLast[lastHit.site]; ok {
siteLast[lastHit.site] += 1
} else {
siteLast[lastHit.site] = 1
}
}
for site, count := range siteLast {
siteCountList = append(siteCountList, SiteCount{
site: site,
count: count,
})
}
sort.Sort(sort.Reverse(SiteCounts(siteCountList)))
fmt.Println("sorted site counts:")
for _, siteCount := range siteCountList {
fmt.Printf("site %s count %d\n", siteCount.site, siteCount.count)
}
fmt.Println("")
count := 0
for user, firstHit := range firstMap {
if lastHit, ok := lastMap[user]; ok {
if firstHit.site == lastHit.site {
count += 1
}
}
}
return count, nil
}
func main() {
var inputFile string
flag.StringVar(&inputFile, "i", "", "input file")
flag.Parse()
if inputFile == "" {
panic("must specify -i input_file")
}
input, err := parseFile(inputFile)
if err != nil {
panic(err)
}
siteID, err := BDVPopularity(input)
if err != nil {
panic(err)
}
fmt.Printf("most popular site per unique users in country BDV: %s\n", siteID)
numUsers, err := UsersFirstLastCount(input)
if err != nil {
panic(err)
}
fmt.Printf("number of users whose first/last visits are to the same site: %d\n", numUsers)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment