Last active
August 11, 2019 20:21
-
-
Save marcgascon/c411a63b79f4353293d67284630a7e80 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# vim: tabstop=4 shiftwidth=4 expandtab | |
import sys, os, re | |
from time import strptime | |
__title__ = "readfile" | |
__version__ = "0.1" | |
def read_file_lines(filename): | |
if not os.path.isfile(filename): | |
print('Input file does not exist. Try it again') | |
exit(1) | |
else: | |
with open(filename) as f: | |
file_content = f.read().splitlines() | |
return file_content | |
def is_positive_integer(string): | |
try: | |
number = int(string) | |
if number > 0: | |
return True | |
else: | |
return False | |
except ValueError: | |
print("ID is not an integer. Line discarded.") | |
return False | |
def process_strings(strings): | |
parsed_strings = [] | |
while strings != "": | |
strings = strings.lstrip() | |
if re.match(r'^\\?".*$', strings): | |
split = re.search(r'(^\\?".*?\\?") +(.*)?', strings) | |
if split is not None: | |
parsed_strings.append(split.group(1)) | |
if split.group(2) is not None: | |
strings = split.group(2) | |
else: | |
break | |
else: | |
parsed_strings.append(strings) | |
break | |
else: | |
split = strings.split(' ', 1) | |
parsed_strings.append(split[0]) | |
if len(split) > 1: strings = split[1] | |
else: break | |
return parsed_strings | |
def user_input_and_result(dictionary, invalid_lines): | |
print('Please provide a comma separated list of ids to show string2') | |
ids = input() | |
if re.match('^[0-9]+(,[0-9]+)*', ids): | |
for num in ids.split(','): | |
if num in dictionary: | |
for string in dictionary[num]: | |
print(num + " " + string) | |
else: | |
print('Id: ' + num + ', does not exist in the input file') | |
else: | |
print('Input you provided is not a comma separated list of positive integers.') | |
print("Invalid lines: " + str(invalid_lines)) | |
def main(): | |
try: | |
filename = sys.argv[1] | |
except IndexError: | |
print('Input file to read from, not provided. Please try it again.') | |
exit(1) | |
file_content = read_file_lines(filename) | |
invalid_lines = 0 | |
db = {} | |
for line in file_content: | |
line_array = line.split(' ', 1) | |
# Get the Id of the line and validate is positive integer | |
line_id = line_array[0] | |
if not is_positive_integer(line_id): | |
invalid_lines += 1 | |
continue | |
line_array = line_array[1].lstrip().split(' ', 1) | |
# Line must have at least: ID, Datetime and Strings | |
if len(line_array) < 2: | |
invalid_lines += 1 | |
continue | |
# Validate that the second string is in the format date-time | |
try: | |
strptime(line_array[0],'%Y-%m-%d-%H:%M:%S') | |
except ValueError: | |
invalid_lines += 1 | |
continue | |
# Now that we checked the id and the date-time it's time to | |
# evaluate the strings. | |
strings = process_strings(line_array[1]) | |
# Validates that the row has at least 3 strings after the Id and the | |
# date-time | |
if len(strings) < 3: | |
invalid_lines += 1 | |
continue | |
if line_id in db: | |
db[line_id].append(strings[1]) | |
else: | |
db[line_id] = [strings[1]] | |
user_input_and_result(db, invalid_lines) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Tested with python 3.7.3