Last active
October 3, 2017 17:22
-
-
Save briantjacobs/27f4890bd172d134197faaecfd893476 to your computer and use it in GitHub Desktop.
Script to process a folder of ephemeris data and break into date ranges
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from jsonmerge import merge | |
import itertools | |
import os | |
import sys | |
import subprocess | |
import processHorizons | |
import simplejson as json | |
import msgpack | |
import dateutil.parser | |
from datetime import datetime | |
rootdir = 'data/cassini' | |
dformat = "%Y-%m-%d %H:%M:%S" | |
dataInterval = 10 | |
# filter data to only after date of interest | |
def dateFilter(d): | |
return datetime.strptime(d["date"],dformat) > datetime(2004, 6, 29) | |
#filter data between arbitrary date range | |
def dateRangeFilter(dateFrom, dateTo): | |
def myfilter(d): | |
return datetime.strptime(d["date"],dformat) < dateTo and datetime.strptime(d["date"],dformat) >= dateFrom | |
return myfilter | |
# extract data from folder of text files | |
def process(): | |
for subdir, dirs, files in os.walk(rootdir): | |
for file in files: | |
path = os.path.join(subdir, file) | |
print "read " + path | |
processed = processHorizons.main(path) | |
yield processed | |
def main(size): | |
# do different things if mobile or desktop flag is provided | |
if size == "mobile": | |
print "Process Mobile" | |
intervalKey = "intervalMobile" | |
suffix = "_sm" | |
else: | |
print "Process Big" | |
intervalKey = "interval" | |
suffix = "" | |
# turn all input files into a single list of data | |
jsonCombined = list(itertools.chain.from_iterable(process())) | |
print "Orig size: " + str(len(jsonCombined)) | |
## read from config file, restructure data in to date range groups | |
jsonRanges = open('src/ngm-assets/data/cassiniSlideRanges.json', "rU") | |
jsonRangesData = json.loads(jsonRanges.read()) | |
# go through all the configs | |
for i, d in enumerate(jsonRangesData): | |
# most groups dont have a "from" | |
if not i == 0: | |
d["from"] = jsonRangesData[i-1]["to"] | |
# last key doesnt have a "to" | |
if i == len(jsonRangesData)-1: | |
d["to"] = jsonCombined[len(jsonCombined)-1]["date"] | |
# create a filter function to use | |
filterFunc = dateRangeFilter(datetime.strptime(d["from"],dformat) , datetime.strptime(d["to"],dformat) ) | |
# use the filter function to limit data between date range | |
filteredData = list(itertools.ifilter(filterFunc, jsonCombined)) | |
# sample the data according to time interval | |
reducedData = list(itertools.islice(filteredData, 0, None, d[intervalKey]/dataInterval)) | |
d["data"] = reducedData | |
print d["from"] + " to " + d["to"] + ":" | |
print "Date filtered: " + str(len(filteredData)) | |
print "Interval filtered:" + str(len(reducedData)) | |
jsonOutput = open('src/ngm-assets/data/cassini_combined'+suffix+'.json', 'w') | |
msgPackOutput = open('src/ngm-assets/data/cassini_combined'+suffix+'.pack', 'w') | |
# write a json | |
print "write json" | |
out = json.dumps(jsonRangesData) | |
jsonOutput.write(out) | |
# compress to msgpack | |
print "write msgpack" | |
packOut = msgpack.packb(jsonRangesData) | |
msgPackOutput.write(packOut) | |
jsonRanges.close() | |
jsonOutput.close() | |
## is this running from commandline? | |
if __name__ == "__main__": | |
main(sys.argv[1]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment