alpiepho/filterCsvDataOnly.py

## filterCsvDataOnly.py
#!/usr/bin/python

# NOTE: my first significant python script - AJP

import codecs
import csv
from datetime import datetime
import getopt
import os
import sys

def processTemplateStart(htmFp, tplFilename):
    with open(tplFilename, 'rb') as f:
        content = f.readlines()
        for line in content:
            if line.find("class=\"dataHeader") >= 0:
                break
            else:
                htmFp.write(line)

def processTemplateEnd(htmFp, tplFilename):
    with open(tplFilename, 'rb') as f:
        content = f.readlines()
        dataStart = False
        dataEnd   = False
        for line in content:
            if line.find("class=\"dataHeader") >= 0:
                dataStart = True
            elif dataStart and line.find("class=\"sectionHeader") >= 0:
                dataEnd = True
            if dataEnd:
                htmFp.write(line)

def processCsv(htmFp, csvFp, delimEnum, dupColumns, rows, dupRows, dateOffset):
    lastDate = ''
    delim = '\t' if delimEnum == 1 else ','
    reader = csv.reader(csvFp, delimiter=delim)
    dataStarted = False
    for row in reader:
        if len(row) > 1:

            # duplicate columns
            columns = dupColumns
            dupRow  = row[1:]
            while columns > 1:
                row = row + dupRow
                columns -= 1

            line = ''.join(row)
            if line.find("Date Time") >= 0:
                # use flag to avoid duplicate sets of data
                if dataStarted:
                    break
                dataStarted = True

                # count for padding out empty "Marked" column
                totalColumns = len(row)

                # duplicate rows loop calls this function, only show header lines once
                if rows == 0:
                    htmFp.write("<tr class=\"dataHeader\">\n")
                    for el in row:
                        if el.find("Date Time") >= 0:
                            htmFp.write("<td isi-data-column-header=\"datetime\">Date Time</td>\n")
                        elif el.find("Marked") >= 0:
                            htmFp.write("<td isi-data-column-header=\"marked\" isi-marked>Marked</td>\n")
                        else:
                            htmFp.write("<td isi-data-column-header=\"parameter\" isi-device-id=\"465769\" isi-sensor-id=\"465769\" isi-sensor-type=\"2\" isi-parameter-type=\"2\" isi-unit-type=\"2\">" + el + "</td>\n")
                    htmFp.write("</tr>\n")
            else:
                isiMarked = ""
                if line.find("Marked") >= 0:
                    isiMarked = " isi-marked"
                htmFp.write("<tr class=\"data\">\n")
                currentColumns = 0
                for el in row:
                    currentColumns += 1
                    if currentColumns == 1:
                        if dateOffset >= 1:
                            dto = datetime.strptime(el, '%m/%d/%y %H:%M:%S.%f')
                            # TODO finish this
                            #dto = dto + datetime.timedelta(seconds=dateOffset)
                            dts = dto.strftime('%-m/%-d/%y %H:%M:%S.%f')[:-3]
                            el = dts
                        htmFp.write("<td isi-data-column isi-timestamp=\"123456\"" + isiMarked + " class=\"date\">" + el + "</td>\n")
                    else:
                        htmFp.write("<td isi-data-column" + isiMarked + ">" + el + "</td>\n")
                if currentColumns < totalColumns:
                    htmFp.write("<td />\n")
                htmFp.write("</tr>\n")
                lastDate = row[0:1]
    if rows+1 == dupRows:
        htmFp.write("<tr>\n")
        htmFp.write("    <td />\n")
        htmFp.write("</tr>\n")
    return lastDate

def Usage():
    print("Usage: %s -i <file> -o <file> -t <file> -c <num> -r <num>" % sys.argv[0])
    print("  -i <file>      Input    CSV file (use utf16toutf8.pv if needed)")
    print("  -o <file>      Output   HTM file")
    print("  -t <file>      Template HTM file, data will be replaced with CVS data")
    print("  -d 0|1         (optional) 0=','(default)  1='\t'")
    print("  -c <num>       (optional) Duplicate columns N times")
    print("  -r <num>       (optional) Duplicate rows    N times")
    print("TODO: adjust time for -r")
    print("TODO: parse CSV meta data instead of fixed values from template)
# TODO parse CSV meta data instead of fixed values from template

csvFilename = ''
htmFilename = ''
tplFilename = ''
dupColumns  = 1
dupRows     = 1
delimEnum   = 0
try:
    # process command arguments
    ouropts, args = getopt.getopt(sys.argv[1:],"i:o:t:d:c:r:h")
    for o, a in ouropts:
        if   o == '-i':
            csvFilename = a
        elif o == '-o':
            htmFilename = a
        elif o == '-t':
            tplFilename = a
        elif o == '-d':
            delimEnum   = int(a)
        elif o == '-c':
            dupColumns  = int(a)
        elif o == '-r':
            dupRows     = int(a)
        elif o == '-h':
            Usage()
            sys.exit(0)
except getopt.GetoptError as e:
    print(str(e))
    Usage()
    sys.exit(2)

if type(csvFilename) != str or len(csvFilename) <= 0:
    print("please use -i for input CSV file")
    Usage()
    sys.exit(0)
if type(htmFilename) != str or len(htmFilename) <= 0:
    print("please use -o for output HTM file")
    Usage()
    sys.exit(0)
if type(tplFilename) != str or len(tplFilename) <= 0:
    print("please use -t for template HTM file")
    Usage()
    sys.exit(0)

with open(htmFilename, 'wb') as htmFp:
    with open(csvFilename, 'rb') as csvFp:
        processTemplateStart(htmFp, tplFilename)

        # handle duplicate rows (TODO: should adjust date)
        rows = 0
        dateOffset = 0
        while rows < dupRows:
            lastDate = processCsv(htmFp, csvFp, delimEnum, dupColumns, rows, dupRows, dateOffset)
            dateOffset += 10000

            csvFp.seek(0)
            rows += 1

        processTemplateEnd(htmFp, tplFilename)
	#!/usr/bin/python

	# NOTE: my first significant python script - AJP

	import codecs
	import csv
	from datetime import datetime
	import getopt
	import os
	import sys

	def processTemplateStart(htmFp, tplFilename):
	with open(tplFilename, 'rb') as f:
	content = f.readlines()
	for line in content:
	if line.find("class=\"dataHeader") >= 0:
	break
	else:
	htmFp.write(line)

	def processTemplateEnd(htmFp, tplFilename):
	with open(tplFilename, 'rb') as f:
	content = f.readlines()
	dataStart = False
	dataEnd = False
	for line in content:
	if line.find("class=\"dataHeader") >= 0:
	dataStart = True
	elif dataStart and line.find("class=\"sectionHeader") >= 0:
	dataEnd = True
	if dataEnd:
	htmFp.write(line)

	def processCsv(htmFp, csvFp, delimEnum, dupColumns, rows, dupRows, dateOffset):
	lastDate = ''
	delim = '\t' if delimEnum == 1 else ','
	reader = csv.reader(csvFp, delimiter=delim)
	dataStarted = False
	for row in reader:
	if len(row) > 1:

	# duplicate columns
	columns = dupColumns
	dupRow = row[1:]
	while columns > 1:
	row = row + dupRow
	columns -= 1

	line = ''.join(row)
	if line.find("Date Time") >= 0:
	# use flag to avoid duplicate sets of data
	if dataStarted:
	break
	dataStarted = True

	# count for padding out empty "Marked" column
	totalColumns = len(row)

	# duplicate rows loop calls this function, only show header lines once
	if rows == 0:
	htmFp.write("<tr class=\"dataHeader\">\n")
	for el in row:
	if el.find("Date Time") >= 0:
	htmFp.write("<td isi-data-column-header=\"datetime\">Date Time</td>\n")
	elif el.find("Marked") >= 0:
	htmFp.write("<td isi-data-column-header=\"marked\" isi-marked>Marked</td>\n")
	else:
	htmFp.write("<td isi-data-column-header=\"parameter\" isi-device-id=\"465769\" isi-sensor-id=\"465769\" isi-sensor-type=\"2\" isi-parameter-type=\"2\" isi-unit-type=\"2\">" + el + "</td>\n")
	htmFp.write("</tr>\n")
	else:
	isiMarked = ""
	if line.find("Marked") >= 0:
	isiMarked = " isi-marked"
	htmFp.write("<tr class=\"data\">\n")
	currentColumns = 0
	for el in row:
	currentColumns += 1
	if currentColumns == 1:
	if dateOffset >= 1:
	dto = datetime.strptime(el, '%m/%d/%y %H:%M:%S.%f')
	# TODO finish this
	#dto = dto + datetime.timedelta(seconds=dateOffset)
	dts = dto.strftime('%-m/%-d/%y %H:%M:%S.%f')[:-3]
	el = dts
	htmFp.write("<td isi-data-column isi-timestamp=\"123456\"" + isiMarked + " class=\"date\">" + el + "</td>\n")
	else:
	htmFp.write("<td isi-data-column" + isiMarked + ">" + el + "</td>\n")
	if currentColumns < totalColumns:
	htmFp.write("<td />\n")
	htmFp.write("</tr>\n")
	lastDate = row[0:1]
	if rows+1 == dupRows:
	htmFp.write("<tr>\n")
	htmFp.write(" <td />\n")
	htmFp.write("</tr>\n")
	return lastDate

	def Usage():
	print("Usage: %s -i <file> -o <file> -t <file> -c <num> -r <num>" % sys.argv[0])
	print(" -i <file> Input CSV file (use utf16toutf8.pv if needed)")
	print(" -o <file> Output HTM file")
	print(" -t <file> Template HTM file, data will be replaced with CVS data")
	print(" -d 0\|1 (optional) 0=','(default) 1='\t'")
	print(" -c <num> (optional) Duplicate columns N times")
	print(" -r <num> (optional) Duplicate rows N times")
	print("TODO: adjust time for -r")
	print("TODO: parse CSV meta data instead of fixed values from template)
	# TODO parse CSV meta data instead of fixed values from template

	csvFilename = ''
	htmFilename = ''
	tplFilename = ''
	dupColumns = 1
	dupRows = 1
	delimEnum = 0
	try:
	# process command arguments
	ouropts, args = getopt.getopt(sys.argv[1:],"i:o:t:d:c:r:h")
	for o, a in ouropts:
	if o == '-i':
	csvFilename = a
	elif o == '-o':
	htmFilename = a
	elif o == '-t':
	tplFilename = a
	elif o == '-d':
	delimEnum = int(a)
	elif o == '-c':
	dupColumns = int(a)
	elif o == '-r':
	dupRows = int(a)
	elif o == '-h':
	Usage()
	sys.exit(0)
	except getopt.GetoptError as e:
	print(str(e))
	Usage()
	sys.exit(2)

	if type(csvFilename) != str or len(csvFilename) <= 0:
	print("please use -i for input CSV file")
	Usage()
	sys.exit(0)
	if type(htmFilename) != str or len(htmFilename) <= 0:
	print("please use -o for output HTM file")
	Usage()
	sys.exit(0)
	if type(tplFilename) != str or len(tplFilename) <= 0:
	print("please use -t for template HTM file")
	Usage()
	sys.exit(0)

	with open(htmFilename, 'wb') as htmFp:
	with open(csvFilename, 'rb') as csvFp:
	processTemplateStart(htmFp, tplFilename)

	# handle duplicate rows (TODO: should adjust date)
	rows = 0
	dateOffset = 0
	while rows < dupRows:
	lastDate = processCsv(htmFp, csvFp, delimEnum, dupColumns, rows, dupRows, dateOffset)
	dateOffset += 10000

	csvFp.seek(0)
	rows += 1

	processTemplateEnd(htmFp, tplFilename)