Skip to content

Instantly share code, notes, and snippets.

@rohityadavcloud
Last active July 5, 2016 10:43
Show Gist options
  • Save rohityadavcloud/1d5d01c3abe4956b91ece9d45af4e6e5 to your computer and use it in GitHub Desktop.
Save rohityadavcloud/1d5d01c3abe4956b91ece9d45af4e6e5 to your computer and use it in GitHub Desktop.
CloudStack Community Analysis

gitlog.sh - gets Apache CloudStack repository and exports git history data to a csv file

download-mboxes.sh - downloads various CloudStack ML archives

parse-mbox-data.sh - parses downloaded mbox archives and exports ML data to a csv file

#!/bin/bash
set -e
mkdir -p mbox
cd mbox
ROOT=$PWD
for list in dev users users-cn marketing
do
cd $ROOT
mkdir -p $list
cd $list
URL_LIST=""
# 2014
for month in $(seq -f "%02g" 1 12)
do
URL_LIST="$URL_LIST http://mail-archives.apache.org/mod_mbox/cloudstack-$list/2014$month.mbox"
done
# 2015
for month in $(seq -f "%02g" 1 9)
do
URL_LIST="$URL_LIST http://mail-archives.apache.org/mod_mbox/cloudstack-$list/2015$month.mbox"
done
# 2016
for month in $(seq -f "%02g" 1 6)
do
URL_LIST="$URL_LIST http://mail-archives.apache.org/mod_mbox/cloudstack-$list/2016$month.mbox"
done
# Synchorize and do parallel download
echo "Downloading files: $URL_LIST"
echo $URL_LIST | xargs -n 1 -P 8 wget
done
git clone https://git-wip-us.apache.org/repos/asf/cloudstack.git
cd cloudstack && git log --format='%H;%ad;%at;%aN;%aE;%cN;%cE' > ../gitlog.csv
#!/bin/env python
from os import listdir
mboxLocation = "./mbox/"
lists = ["dev", "marketing", "users", "users-cn"]
for ml in lists:
csv = []
outputFile = ml + ".csv"
for file in listdir(mboxLocation + ml):
with open(mboxLocation + ml + "/" + file, "r") as f:
blockFound = False
date = None
fromField = None
msgId = None
count = 0
entry = []
for line in f:
if line.startswith("From " + ml):
blockFound = True
count = 0
if blockFound:
if line.startswith("Date: "):
date = line.split("Date: ")[1].strip()
count += 1
if line.startswith("From: "):
fromField = line.split("From: ")[1].strip()
if "<" in fromField:
fromField = fromField.split("<")[1].split(">")[0]
count += 1
if line.startswith("Message-ID: "):
msgId = line.split("Message-ID: ")[1].strip()
count += 1
if count == 3:
csv.append([date, fromField, msgId])
count = 0
blockFound = False
if count > 3:
print "This cannot happen"
with open(ml + ".csv", "w") as f:
for entry in csv:
f.write(";".join(entry) + "\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment