Skip to content

Instantly share code, notes, and snippets.

@pudquick
Last active September 18, 2015 16:09
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save pudquick/ffdbdb52ae6960ca8e55 to your computer and use it in GitHub Desktop.
Save pudquick/ffdbdb52ae6960ca8e55 to your computer and use it in GitHub Desktop.
Playing with parsing Server caching service logs
import tempfile, os.path, shutil, glob, os, subprocess, re
debug = True
# It should take the logs from tmp and clone them somewhere
# It can then bunzip and combine them
def log(s):
global debug
if debug:
print s
logPath = "/Users/mike/Desktop/CacheLogs"
targetDate = "2015-04-13"
# Are we sure the logs aren't in decimal bytes? Oh well, binary it is..
sizeMultiplier = {'GB': 1073741824.,
'MB': 1048576.,
'bytes': 1.}
# Make a temporary directory to work with, per python docs:
# "The directory is readable, writable, and searchable only by the creating user ID."
tmpDir = tempfile.mkdtemp()
log("tmpDir: %s" % tmpDir)
# Clone the contents of logPath over into the 'logs' subdirectory
# (shutil.copytree doesn't want the directory to pre-exist)
tmpLogs = os.path.join(tmpDir, 'logs')
log("tmpLogs: %s" % tmpLogs)
shutil.copytree(logPath, tmpLogs)
# Expand any .bz files in the directory (Server 4.1+)
os.chdir(tmpLogs)
for bzLog in glob.glob(os.path.join(tmpLogs, '*.bz2')):
result = subprocess.check_call(["bzip2", "-d", bzLog])
# Now combine all .log files in the destination into a temp file that's removed when python exits
rawLog = tempfile.TemporaryFile()
for anyLog in glob.glob(os.path.join(tmpLogs, '*.log')):
with open(anyLog, 'rb') as f:
shutil.copyfileobj(f, rawLog)
# skip back to the beginning of our newly concatenated log
rawLog.seek(0)
# Now we can reap the tmpDir since we have everything we need in a self-disposing file
shutil.rmtree(tmpDir)
# Now the real work can begin
def process_log(lines):
# Basically run through all the lines a single time and collect all the relevant data to slice, do stats with, etc.
sizeLog = []
IPLog = []
OSLog = []
iOSModelLog = []
# ...etc.
# Right now just debug log()ing all the parsed/captured data straight back out to stdout
for x in lines:
# If there aren't at least 3 pieces somehow, they'll get filled in with blanks
datestr,timestr,logmsg = (x.split(' ',2)+['','',''])[:3]
if datestr == targetDate:
# Only do work if the string is on the date we care about
try:
if 'Since server start:' in logmsg:
# Get the 4 size values, in order, as bytes
sizes = map(lambda x: int(float(x[0])*sizeMultiplier[x[1]]), re.findall(r'(?<= |\()([0-9.]{1,}) (GB|MB|bytes)(?= )', logmsg))
sizeLog.append(sizes)
log('transferred:'+sizes.__repr__())
elif 'Request from ' in logmsg:
# It's a request, log the IP - Apple should only be sending IPs to our server if they're supposed to be served by it - why filter? We still served it.
# Found interesting item that had a null IP:
if 'Request from [(null)]' not in logmsg:
ip = re.match(r'[^ ]+ Request from (([0-9]+\.?){4})', logmsg).group(1)
IPLog.append(ip)
log('ip:'+ip)
# Also need to log the OS
osInfo = re.match(r'.+? ((iOS|Darwin|OS X)[/ ](([0-9]+\.?){1,}))', logmsg)
osFamily = osInfo.group(2)
osVersion = osInfo.group(3)
OSLog.append((osFamily, osVersion))
log('os:%s %s' % (osFamily, osVersion))
# If it's iOS, then also log the product family and model
if osFamily == 'iOS':
iOSInfo = re.match(r'.+? model/([^ ]+?)([0-9]+,?[0-9]?)', logmsg)
iOSModel = iOSInfo.group(1)
iOSVersion = iOSInfo.group(2)
iOSModelLog.append((iOSModel, iOSVersion))
log('iOS:%s%s' % (iOSModel, iOSVersion))
# Do the stage VI stuff here - I need some sleep
except:
print logmsg
raise Exception("Funky line - check it out")
# Process the log
process_log(rawLog.readlines())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment