briantjacobs/combineHorizons.py

## combineHorizons.py
from jsonmerge import merge
import itertools
import os
import sys
import subprocess
import processHorizons
import simplejson as json
import msgpack
import dateutil.parser
from datetime import datetime

rootdir = 'data/cassini'
dformat = "%Y-%m-%d %H:%M:%S"
dataInterval = 10

# filter data to only after date of interest
def dateFilter(d):
	return datetime.strptime(d["date"],dformat) > datetime(2004, 6, 29)

#filter data between arbitrary date range
def dateRangeFilter(dateFrom, dateTo):
	def myfilter(d):
		return datetime.strptime(d["date"],dformat) < dateTo and datetime.strptime(d["date"],dformat) >= dateFrom
	return myfilter

# extract data from folder of text files
def process():
	for subdir, dirs, files in os.walk(rootdir):
	    for file in files:
	    	path = os.path.join(subdir, file)
	    	print "read " + path
	    	processed = processHorizons.main(path)
	    	yield processed


def main(size):
	#  do different things if mobile or desktop flag is provided
	if size == "mobile":
		print "Process Mobile"
		intervalKey = "intervalMobile"
		suffix = "_sm"
	else:
		print "Process Big"
		intervalKey = "interval"
		suffix = ""

	# turn all input files into a single list of data
	jsonCombined = list(itertools.chain.from_iterable(process()))
	print "Orig size: " + str(len(jsonCombined))

	## read from config file, restructure data in to date range groups
	jsonRanges = open('src/ngm-assets/data/cassiniSlideRanges.json', "rU")
	jsonRangesData = json.loads(jsonRanges.read())

	# go through all the configs
	for i, d in enumerate(jsonRangesData):
		# most groups dont have a "from"
		if not i == 0:
			d["from"] = jsonRangesData[i-1]["to"]

		# last key doesnt have a "to"
		if i == len(jsonRangesData)-1:
			d["to"] = jsonCombined[len(jsonCombined)-1]["date"]

		# create a filter function to use
		filterFunc = dateRangeFilter(datetime.strptime(d["from"],dformat) , datetime.strptime(d["to"],dformat) )
		# use the filter function to limit data between date range
		filteredData = list(itertools.ifilter(filterFunc, jsonCombined))
		# sample the data according to time interval
		reducedData = list(itertools.islice(filteredData, 0, None, d[intervalKey]/dataInterval))
		d["data"] = reducedData

		print d["from"] + " to " + d["to"] + ":"
		print "Date filtered: " + str(len(filteredData))
		print "Interval filtered:" + str(len(reducedData))

	jsonOutput = open('src/ngm-assets/data/cassini_combined'+suffix+'.json', 'w')
	msgPackOutput = open('src/ngm-assets/data/cassini_combined'+suffix+'.pack', 'w')

	# write a json
	print "write json"
	out = json.dumps(jsonRangesData)
	jsonOutput.write(out)

	# compress to msgpack
	print "write msgpack"
	packOut = msgpack.packb(jsonRangesData)
	msgPackOutput.write(packOut)

	jsonRanges.close()
	jsonOutput.close()

## is this running from commandline?
if __name__ == "__main__":
    main(sys.argv[1])
	from jsonmerge import merge
	import itertools
	import os
	import sys
	import subprocess
	import processHorizons
	import simplejson as json
	import msgpack
	import dateutil.parser
	from datetime import datetime

	rootdir = 'data/cassini'
	dformat = "%Y-%m-%d %H:%M:%S"
	dataInterval = 10

	# filter data to only after date of interest
	def dateFilter(d):
	return datetime.strptime(d["date"],dformat) > datetime(2004, 6, 29)

	#filter data between arbitrary date range
	def dateRangeFilter(dateFrom, dateTo):
	def myfilter(d):
	return datetime.strptime(d["date"],dformat) < dateTo and datetime.strptime(d["date"],dformat) >= dateFrom
	return myfilter

	# extract data from folder of text files
	def process():
	for subdir, dirs, files in os.walk(rootdir):
	for file in files:
	path = os.path.join(subdir, file)
	print "read " + path
	processed = processHorizons.main(path)
	yield processed


	def main(size):
	# do different things if mobile or desktop flag is provided
	if size == "mobile":
	print "Process Mobile"
	intervalKey = "intervalMobile"
	suffix = "_sm"
	else:
	print "Process Big"
	intervalKey = "interval"
	suffix = ""

	# turn all input files into a single list of data
	jsonCombined = list(itertools.chain.from_iterable(process()))
	print "Orig size: " + str(len(jsonCombined))

	## read from config file, restructure data in to date range groups
	jsonRanges = open('src/ngm-assets/data/cassiniSlideRanges.json', "rU")
	jsonRangesData = json.loads(jsonRanges.read())

	# go through all the configs
	for i, d in enumerate(jsonRangesData):
	# most groups dont have a "from"
	if not i == 0:
	d["from"] = jsonRangesData[i-1]["to"]

	# last key doesnt have a "to"
	if i == len(jsonRangesData)-1:
	d["to"] = jsonCombined[len(jsonCombined)-1]["date"]

	# create a filter function to use
	filterFunc = dateRangeFilter(datetime.strptime(d["from"],dformat) , datetime.strptime(d["to"],dformat) )
	# use the filter function to limit data between date range
	filteredData = list(itertools.ifilter(filterFunc, jsonCombined))
	# sample the data according to time interval
	reducedData = list(itertools.islice(filteredData, 0, None, d[intervalKey]/dataInterval))
	d["data"] = reducedData

	print d["from"] + " to " + d["to"] + ":"
	print "Date filtered: " + str(len(filteredData))
	print "Interval filtered:" + str(len(reducedData))

	jsonOutput = open('src/ngm-assets/data/cassini_combined'+suffix+'.json', 'w')
	msgPackOutput = open('src/ngm-assets/data/cassini_combined'+suffix+'.pack', 'w')

	# write a json
	print "write json"
	out = json.dumps(jsonRangesData)
	jsonOutput.write(out)

	# compress to msgpack
	print "write msgpack"
	packOut = msgpack.packb(jsonRangesData)
	msgPackOutput.write(packOut)

	jsonRanges.close()
	jsonOutput.close()

	## is this running from commandline?
	if __name__ == "__main__":
	main(sys.argv[1])