QuantVI/scribe_report_v2.py

## scribe_report_v2.py
# -*- coding: utf-8 -*-
desc1 = ''' The scribe takes in JSON output from ElasticSearch results.
It then dynamically transforms the output into tabluar format,
writing the data straight to an Excel file using an applicable
Excel module.'''

# See report version 1 "scribe_report.py" for code comments on the core build
# verison 1 code now in excel_printer.py

import json
import openpyxl
import excel_printer
import argparse
import os

# This next section can be its own script if necessary
# begin dont_overwrite ----
def dont_overwrite(this_file):
    '''Returns the next generic fileame that you can save to without
    overwriting an existing file of that name.
    generic name must resemble blah_nn_.ext
    - blah: you choose this name, like "outPut"
    - nn: two digit number, like "04"
    - .ext: any extension, like ".csv"
    '''
    import sys
    import os
    if os.path.exists(this_file):
        p = this_file.split('_')
        p1int = int(p[1])
        next_int_str = str(p1int+1)
        if p1int < 9:
            next_int_str = ''.join(['0',str(p1int+1)])
        next_file = '_'.join([p[0],next_int_str,p[2]])
        return dont_overwrite(next_file)
    else:
        return this_file
# end dont_overwrite section ----


output_filename='out_00_.xlsx'
# # # out_to = dont_overwrite(output_filename)

def scribe_reports(path='', existing='n', xlfile=output_filename):
    def get_files(path_to):
        files_loc = '/'.join((os.getcwd(),path))
        raw_list_of_files = os.listdir(files_loc)
        proc_lof = []
        for name in raw_list_of_files:
            if ('.json' in name):
                proc_lof.append(name)
        return proc_lof

    out_to = None
    exz = existing
    if exz in 'yY':
        out_to = xlfile
    else:
        out_to = dont_overwrite(xlfile) if xlfile == output_filename else xlfile
        created_wb = openpyxl.Workbook()
        created_wb.save(filename = out_to)

    lof = get_files(path)
    print lof

    # new_sheet should always be the case
    # we actually care about when to have a new file
    # if fact, we aren;t using the "existing" parameter
    # - if you supply a filename, we assume it exixts and append to the file.
    # - otherwise we use a standard file name
    # ** only supply a filename if it exists
    #   - this can be changed later to supply a "new" filename

    # this function shoudl be modified to properly use the path
    def read_json_from_file(filename):
        print 'reading from: ',filename
        op = open(filename,'rb')
        rd = op.read()
        op.close()
        try:
            js = json.loads(rd)
        except ValueError:
            js = {filename : 'was not valid json'}
        return js

    for a_file in lof:
        jsondata = read_json_from_file(a_file)
        struc = excel_printer.detectStructure(jsondata)
        handled = excel_printer.handleStructuredData(jsondata, struc)
        excel_printer.printInExcel(jsondata,handled, out_to,'new_sheet')


'''
# level_1: type B, one 1st level agg with multiple dict items in the bucket
qqq = open('_ohhs-status-over-year-test')
www = qqq.read()
qqq.close()
eee = json.loads(www)

# level_2
aaa = open('_emma_vi_res-per-yr-with-status-code-test-2')
sss = aaa.read()
aaa.close()
ddd = json.loads(sss)

# multi_s
zzz = open('_test_msearch','rb')
xxx = zzz.read()
zzz.close()
ccc = json.loads(xxx)

# level_1 : type A, multiple 1st level aggs
ttt = open('_ohhs-codes-test','rb')
ggg = ttt.read()
ttt.close()
bbb = json.loads(ggg)
'''
if __name__=='__main__':
    ar = {
        'p' :['--path', 'location of the json input files'],
        'x' :['--existing', 'y or n if the output xlsx already exists'],
        'f' :['--file','if existing, location of the excel file to write to']
        }

    def parse_args():
        parser = argparse.ArgumentParser(description = desc1)
        parser.add_argument('-p',ar['p'][0],help= ar['p'][1],default='')
        parser.add_argument('-x',ar['x'][0],help= ar['x'][1],default='n')
        parser.add_argument('-f',ar['f'][0],help= ar['f'][1],
                            default=output_filename)
        return parser.parse_args()
    args = parse_args()
    print args

    scribe_reports(args.path, args.existing, args.file)
	# -- coding: utf-8 --
	desc1 = ''' The scribe takes in JSON output from ElasticSearch results.
	It then dynamically transforms the output into tabluar format,
	writing the data straight to an Excel file using an applicable
	Excel module.'''

	# See report version 1 "scribe_report.py" for code comments on the core build
	# verison 1 code now in excel_printer.py

	import json
	import openpyxl
	import excel_printer
	import argparse
	import os

	# This next section can be its own script if necessary
	# begin dont_overwrite ----
	def dont_overwrite(this_file):
	'''Returns the next generic fileame that you can save to without
	overwriting an existing file of that name.
	generic name must resemble blah_nn_.ext
	- blah: you choose this name, like "outPut"
	- nn: two digit number, like "04"
	- .ext: any extension, like ".csv"
	'''
	import sys
	import os
	if os.path.exists(this_file):
	p = this_file.split('_')
	p1int = int(p[1])
	next_int_str = str(p1int+1)
	if p1int < 9:
	next_int_str = ''.join(['0',str(p1int+1)])
	next_file = '_'.join([p[0],next_int_str,p[2]])
	return dont_overwrite(next_file)
	else:
	return this_file
	# end dont_overwrite section ----


	output_filename='out_00_.xlsx'
	# # # out_to = dont_overwrite(output_filename)

	def scribe_reports(path='', existing='n', xlfile=output_filename):
	def get_files(path_to):
	files_loc = '/'.join((os.getcwd(),path))
	raw_list_of_files = os.listdir(files_loc)
	proc_lof = []
	for name in raw_list_of_files:
	if ('.json' in name):
	proc_lof.append(name)
	return proc_lof

	out_to = None
	exz = existing
	if exz in 'yY':
	out_to = xlfile
	else:
	out_to = dont_overwrite(xlfile) if xlfile == output_filename else xlfile
	created_wb = openpyxl.Workbook()
	created_wb.save(filename = out_to)

	lof = get_files(path)
	print lof

	# new_sheet should always be the case
	# we actually care about when to have a new file
	# if fact, we aren;t using the "existing" parameter
	# - if you supply a filename, we assume it exixts and append to the file.
	# - otherwise we use a standard file name
	# ** only supply a filename if it exists
	# - this can be changed later to supply a "new" filename

	# this function shoudl be modified to properly use the path
	def read_json_from_file(filename):
	print 'reading from: ',filename
	op = open(filename,'rb')
	rd = op.read()
	op.close()
	try:
	js = json.loads(rd)
	except ValueError:
	js = {filename : 'was not valid json'}
	return js

	for a_file in lof:
	jsondata = read_json_from_file(a_file)
	struc = excel_printer.detectStructure(jsondata)
	handled = excel_printer.handleStructuredData(jsondata, struc)
	excel_printer.printInExcel(jsondata,handled, out_to,'new_sheet')


	'''
	# level_1: type B, one 1st level agg with multiple dict items in the bucket
	qqq = open('_ohhs-status-over-year-test')
	www = qqq.read()
	qqq.close()
	eee = json.loads(www)

	# level_2
	aaa = open('_emma_vi_res-per-yr-with-status-code-test-2')
	sss = aaa.read()
	aaa.close()
	ddd = json.loads(sss)

	# multi_s
	zzz = open('_test_msearch','rb')
	xxx = zzz.read()
	zzz.close()
	ccc = json.loads(xxx)

	# level_1 : type A, multiple 1st level aggs
	ttt = open('_ohhs-codes-test','rb')
	ggg = ttt.read()
	ttt.close()
	bbb = json.loads(ggg)
	'''
	if __name__=='__main__':
	ar = {
	'p' :['--path', 'location of the json input files'],
	'x' :['--existing', 'y or n if the output xlsx already exists'],
	'f' :['--file','if existing, location of the excel file to write to']
	}

	def parse_args():
	parser = argparse.ArgumentParser(description = desc1)
	parser.add_argument('-p',ar['p'][0],help= ar['p'][1],default='')
	parser.add_argument('-x',ar['x'][0],help= ar['x'][1],default='n')
	parser.add_argument('-f',ar['f'][0],help= ar['f'][1],
	default=output_filename)
	return parser.parse_args()
	args = parse_args()
	print args

	scribe_reports(args.path, args.existing, args.file)