Skip to content

Instantly share code, notes, and snippets.

@LaBlazer
Last active June 3, 2019 11:14
Show Gist options
  • Save LaBlazer/5f8b0d0cfd97110820104ed252928aa3 to your computer and use it in GitHub Desktop.
Save LaBlazer/5f8b0d0cfd97110820104ed252928aa3 to your computer and use it in GitHub Desktop.
Classifies date formats and outputs them in standard format (day/month/year)
#Kopirajt LBLZR_ lmao
import os.path, codecs, sys
date_files = ["dates1.txt", "dates2.txt", "dates3.txt", "dates4.txt"]
output_file = "dates_out.txt"
def save_list(listt, filename):
with codecs.open(filename, "w", "utf-8") as fp:
#firstly we delete old entries
fp.truncate()
for item in listt:
fp.write("{}\r\n".format(item))
def load_list(filename):
#check if exists
if os.path.isfile(filename):
with codecs.open(filename, "r", "utf-8") as fp:
listt = fp.readlines()
#remove whitespace characters at the end of each line
listt = [x.strip() for x in listt]
return listt
return []
def process_dates(dates):
print("Classifying date format")
out_dates = []
delimeters = ['.', '/']
types = [0,0,0]
type_string = ["unknown", "day", "month", "year"] # 1 = day, 2 = month, 3 = year
for date in dates:
for idx, s in enumerate(''.join([o if not o in delimeters else ' ' for o in list(date)]).split()):
s = int(s)
if(s <= 12):
types[idx] = 2 #month
elif(s <= 31):
types[idx] = 1 #day
else:
types[idx] = 3 #year
if sum(types) == 6:
break
print(f"Date format: {type_string[types[0]]}/{type_string[types[1]]}/{type_string[types[2]]}")
print("Changing to standard format (day/month/year)...")
# Create lookup table
type_lookup = [0, 0, 0]
for id, i in enumerate(types):
if(i == 1):
type_lookup[id] = 0
if(i == 2):
type_lookup[id] = 1
if(i == 3):
type_lookup[id] = 2
print(f"Lookup table: {type_lookup}")
for date in dates:
split_date = ''.join([o if not o in delimeters else ' ' for o in list(date)]).split()
out_dates.append(f"{int(split_date[type_lookup[0]])}/{int(split_date[type_lookup[1]])}/{int(split_date[type_lookup[2]])}")
return out_dates
dates_out = []
for filename in date_files:
print(f"Processing file {filename}")
dates_out.extend(process_dates(load_list(filename)))
print(f"Saving dates to {output_file}")
save_list(dates_out, output_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment