Skip to content

Instantly share code, notes, and snippets.

@oneshot719
Last active May 16, 2019 14:49
Show Gist options
  • Save oneshot719/91e91e4c9998126990323a5524724ca0 to your computer and use it in GitHub Desktop.
Save oneshot719/91e91e4c9998126990323a5524724ca0 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
import csv
import json
import re
import datetime
import sys, getopt
############
# GET OPTS #
############
#Get Command Line Arguments
def main(argv):
inputfile = ''
outputfile = ''
try:
opts, args = getopt.getopt(argv,"hi:o:",["ifile=","ofile="])
except getopt.GetoptError:
print('Usage: file.py -i <inputfile> -o <outputfile>')
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print('Usage: file.py -i <inputfile> -o <outputfile>')
sys.exit()
elif opt in ("-i", "--ifile"):
inputfile = arg
elif opt in ("-o", "--ofile"):
outputfile = arg
print('Input file is: ', inputfile)
print('Output file is: ', outputfile)
print("")
csvfile = open(inputfile , 'r')
jsonfile = open(outputfile , 'w')
rawreader = csv.reader(csvfile)
for rawrow in rawreader:
print("")
# print('Row #' + str(rawreader.line_num) + ' ' + str(rawrow))
stringrow=str(rawrow)
#############
# REGEX PULL
#############
# regex_pull = re.match(r"^..(\d{10,99})\'\,.\'(.*\>)", stringrow)
regex_pull = re.match(r"^..(\d{10,99})\'\,.\'(.*\>)\'\,.\'(.*)\'\,.\'(.*)\'\,.\'(.*)\'\,.\'(.*)..", stringrow)
# print('Regex Mapping: ' + str(regex_pull))
epoch_date = int(regex_pull.group(1))
msg_id = str(regex_pull.group(2))
sender = str(regex_pull.group(3))
recipients = str(regex_pull.group(4))
topic = str(regex_pull.group(5))
mode = str(regex_pull.group(6))
# print('match3: ' + str(match3))
# print('match4: ' + str(match4))
# print('match5: ' + str(match5))
# print('match6: ' + str(match6))
#############
# DATE_TIME #
#############
# epoch_date = re.match(r"^..(\d{10,99})", stringrow)
# print(epoch_date.group(1))
# Now we convert from epoch to UTC, generally epoch is always UTC, but we're not trusting our local system clock TZ.
epoch_date_seconds_int = int(epoch_date / 1000.0)
iso_date=datetime.datetime.utcfromtimestamp(epoch_date_seconds_int).strftime('%Y-%m-%d %H:%M:%S')
# print('Epoch Date as seconds is: ' + str(epoch_date_seconds_int))
print('ISO Formatted date is: ' + str(iso_date))
##########
# MSG ID #
##########
print('MSG ID: ' + str(msg_id))
##########
# SENDER #
##########
print('SENDER: ' + str(sender))
##############
# RECIPIENTS #
##############
print('RECIPIENTS: ' + str(recipients))
recipients_newline = str(recipients.replace("|"," \n"))
number_of_recipients = int(recipients_newline.count('\n'))
#new line counts needs x+1 to be proper so we will do that now
number_of_recipients = number_of_recipients + 1
print('number_of_recipients: ' + str(number_of_recipients))
#########
# TOPIC #
#########
print('TOPIC: ' + str(topic))
########
# MODE #
########
print('MODE: ' + str(mode))
print("")
# json.dump(row, jsonfile)
# jsonfile.write('\n')
#fieldnames = ("timestamp","msgid","sender","recipients","topic","mode")
#comboreader = csv.DictReader( csvfile, fieldnames)
fieldnames = ("timestamp","msgid","sender","recipients","topic","mode")
xreader = csv.DictReader( csvfile, fieldnames)
for row in xreader:
json.dump(row, jsonfile)
jsonfile.write('\n')
if __name__ == "__main__":
main(sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment