Skip to content

Instantly share code, notes, and snippets.

@marcgascon
Last active August 11, 2019 20:21
Show Gist options
  • Save marcgascon/c411a63b79f4353293d67284630a7e80 to your computer and use it in GitHub Desktop.
Save marcgascon/c411a63b79f4353293d67284630a7e80 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim: tabstop=4 shiftwidth=4 expandtab
import sys, os, re
from time import strptime
__title__ = "readfile"
__version__ = "0.1"
def read_file_lines(filename):
if not os.path.isfile(filename):
print('Input file does not exist. Try it again')
exit(1)
else:
with open(filename) as f:
file_content = f.read().splitlines()
return file_content
def is_positive_integer(string):
try:
number = int(string)
if number > 0:
return True
else:
return False
except ValueError:
print("ID is not an integer. Line discarded.")
return False
def process_strings(strings):
parsed_strings = []
while strings != "":
strings = strings.lstrip()
if re.match(r'^\\?".*$', strings):
split = re.search(r'(^\\?".*?\\?") +(.*)?', strings)
if split is not None:
parsed_strings.append(split.group(1))
if split.group(2) is not None:
strings = split.group(2)
else:
break
else:
parsed_strings.append(strings)
break
else:
split = strings.split(' ', 1)
parsed_strings.append(split[0])
if len(split) > 1: strings = split[1]
else: break
return parsed_strings
def user_input_and_result(dictionary, invalid_lines):
print('Please provide a comma separated list of ids to show string2')
ids = input()
if re.match('^[0-9]+(,[0-9]+)*', ids):
for num in ids.split(','):
if num in dictionary:
for string in dictionary[num]:
print(num + " " + string)
else:
print('Id: ' + num + ', does not exist in the input file')
else:
print('Input you provided is not a comma separated list of positive integers.')
print("Invalid lines: " + str(invalid_lines))
def main():
try:
filename = sys.argv[1]
except IndexError:
print('Input file to read from, not provided. Please try it again.')
exit(1)
file_content = read_file_lines(filename)
invalid_lines = 0
db = {}
for line in file_content:
line_array = line.split(' ', 1)
# Get the Id of the line and validate is positive integer
line_id = line_array[0]
if not is_positive_integer(line_id):
invalid_lines += 1
continue
line_array = line_array[1].lstrip().split(' ', 1)
# Line must have at least: ID, Datetime and Strings
if len(line_array) < 2:
invalid_lines += 1
continue
# Validate that the second string is in the format date-time
try:
strptime(line_array[0],'%Y-%m-%d-%H:%M:%S')
except ValueError:
invalid_lines += 1
continue
# Now that we checked the id and the date-time it's time to
# evaluate the strings.
strings = process_strings(line_array[1])
# Validates that the row has at least 3 strings after the Id and the
# date-time
if len(strings) < 3:
invalid_lines += 1
continue
if line_id in db:
db[line_id].append(strings[1])
else:
db[line_id] = [strings[1]]
user_input_and_result(db, invalid_lines)
if __name__ == '__main__':
main()
@marcgascon
Copy link
Author

Tested with python 3.7.3

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment