Created
March 16, 2019 22:08
-
-
Save ChongyeWang/dbc08a6a627adf2942e0d94d688c1c26 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import re | |
with open('input.txt', 'r') as f: | |
data = f.read() | |
normal = "(\d+/\d+/\d+)" | |
normal_month_date = "(\d+/\d+)" | |
normal_year = '[0-9][0-9][0-9][0-9]' | |
holidays = "(New Year's Day|Inauguration Day|Martin Luther King, Jr. Day|George Washington’s Birthday|Memorial Day|Independence Day|Labor Day|Columbus Day|Veterans Day|Thanksgiving Day|Christmas Day)" | |
weeks = "(Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday)" | |
months = "(J[Aa][Nn][.a-z]*|F[Ee][Bb][.a-z]*|M[Aa][Rr][.a-z]*|A[Pp][Rr][.a-z]*|M[Aa][Yy][.a-z]*|J[Uu][Nn][.a-z]*|J[Uu][Ll][.a-z]*|A[Uu][Gg][.a-z]*|S[Ee][Pp][Tt][.a-z]*|O[Cc][Tt][.a-z]*|N[Oo][Vv][.a-z]*|D[Ee][Cc][.a-z]*)" | |
weeks_time = weeks + '( \d+(a.m.|p.m.|am|pm))' | |
weeks_periods = weeks + '( (morning|afternoon|evening))' | |
months_date = months + ' [0-9]*[0-9]*(st|nd|rd|th)*,*\s*' + '([0-9][0-9][0-9][0-9])+' | |
months_date2 = months + ' [0-9]*[0-9]*(st|nd|rd|th)+' | |
weeks_date = weeks + ',*\s*' + months_date | |
months_date_with_para1 = 'the ' + '[0-9]*[0-9]*(st|nd|rd|th)* ' + 'of ' + months | |
months_date_with_para2 = 'the ' + '(first|second|third)' + ' of ' + months | |
result = [] | |
for r in [normal, normal_month_date, normal_year, holidays, weeks, months, weeks_time, \ | |
weeks_periods, months_date, months_date2, weeks_date, months_date_with_para1, | |
months_date_with_para2]: | |
match = re.finditer(r, data) | |
for m in match: | |
if m.group() not in result: | |
result.append(m.group()) | |
print(result) | |
with open("output.txt", "w") as output: | |
for date in result: | |
output.write(date + "\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment