Skip to content

Instantly share code, notes, and snippets.

@solalatus
Created July 16, 2016 12:33
Show Gist options
  • Save solalatus/0c353156ac7678d32118df290ae40ede to your computer and use it in GitHub Desktop.
Save solalatus/0c353156ac7678d32118df290ae40ede to your computer and use it in GitHub Desktop.
Dateparser based detctor for all dates in a string - untested
import dateparser
def detect_date(line=""):
"""Detect multiple, multi language date and datetime entries in a given string.
@param type: String to detect occuring_dates in, ideally a single line.
@type type: String
@return: Returns the list of found DateTime objects or None
@rtype: List or None
"""
# For a start, split the string, remove , and ; to get rid of edge cases
# and be able to manipulate the data word by word
splitted_line=line.replace(",","").replace(";","").split()
occuring_dates=[]
broken=False
i=0
j=len(splitted_line)+1
while i<len(splitted_line)-1:
while j>=0:
#Generate a candidate combination of words
section=" ".join(splitted_line[i:j])
try:
#If the input string is really not vaild, dateparser will raise an exception.
date=dateparser.parse(section)
if date==None:
#If dateparser returns with None, raise an exception.
raise Exception("No date could be parsed.")
else:
#If we found something, just store it.
occuring_dates.append(date)
except:
#Nothing has been found, go ahead...
j-=1
else:
#We have found something, hopefully
if date:
#Update the original list, remove everything "covered" by what we found.
#This is the way to get rid of duplicates from parsing parts
# of the same datetime many times.
splitted_line=splitted_line[j+1:]
i=0
broken=True
break
j=len(splitted_line)+1
if not broken:
i+=1
#This is a "detector", so give None if nothing is found, so as to fit in logical operations.
if occuring_dates==[]:
occuring_dates=None
return occuring_dates
if __name__ == '__main__':
#English dummy example
line="aaa Sat, 16 Jul 2016 06:09:12 +0000 and Sat, 17 Aug 2015 09:11:15 +0100 aaa and 2 days ago aaaaa"
results=detect_date(line)
for r in results:
print(r)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment