Skip to content

Instantly share code, notes, and snippets.

@Ash-Crow
Last active October 26, 2020 11:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Ash-Crow/08bededf6a2d87a0a06453ff7803f355 to your computer and use it in GitHub Desktop.
Save Ash-Crow/08bededf6a2d87a0a06453ff7803f355 to your computer and use it in GitHub Desktop.
Test script to compare the dateparser python module and the ad-hoc date parsing of feedparser
import pkg_resources
from datetime import timedelta
import time
from pprint import pprint
import dateparser
from feedparser import datetimes
print(f"dateparser version: {pkg_resources.get_distribution('dateparser').version}")
print(f"feedparser version: {pkg_resources.get_distribution('feedparser').version}")
print("\n=========\n")
samples = [
"Thu, 01 Jan 04 19:48:21 GMT",
"Thu, 01 Jan 2004 19:48:21 GMT",
"01 Jan 2004",
"01 Jan 2004 00:00 GMT",
"2003-12-31T10:14:55-08:00",
"2003-12-31T10:14:55Z",
"2003",
"2003-12",
"2003-12-31",
"20031231",
"-03-12",
"-0312",
"-03-12-31",
"031231",
"2003-335",
"03335",
"Sun Jan 4 16:29:06 PST 2004",
"Thu, 31 Jun 2004 19:48:21 GMT",
"Mon, 26 January 2004",
"Mon, 26 Jan 2004 16:31:00 ET",
"2003-12-31T25:14:55Z",
"2003-12-31T10:61:55Z",
"2003-12-31T10:14:61Z",
"2004-07-08 23:56:58.0",
"2004-07-08 23:56:58",
"2004-05-25 오 11:23:17",
"Κυρ, 11 Ιούλ 2004 12:00:00 EST",
"július-13T9:15-05:00",
"Mardi, 8. Septembre 2020 - 10:04",
]
# Result counters
results = {
'same_result': 0,
'no_date_found': 0,
'different_dates': 0,
'dateparser_only': 0,
'feedparser_only': 0
}
for s in samples:
# With dateparser
try:
dp_parsed_date = dateparser.parse(s).utctimetuple()
except:
dp_parsed_date = None
# With feedparser datetimes
fpdt_parsed_date = datetimes._parse_date(s)
# Comparisons
if not dp_parsed_date and not fpdt_parsed_date:
print(f"NOK for {s}: Neither module could interpret this as a date")
results['no_date_found'] += 1
elif dp_parsed_date == fpdt_parsed_date:
results['same_result'] += 1
print(f"OK for {s}: {dp_parsed_date}")
else:
if dp_parsed_date and fpdt_parsed_date:
delta = f"{timedelta(seconds=time.mktime(dp_parsed_date)-time.mktime(fpdt_parsed_date))} off"
results['different_dates'] += 1
else:
if dp_parsed_date:
correct_module = "dateparser"
results['dateparser_only'] += 1
else:
correct_module = "feedparser"
results['feedparser_only'] += 1
delta = f" only {correct_module} could interpred this as a date"
print(f"NOK for {s}: {delta}.\n dateparser: {dp_parsed_date}\n feedparser: {fpdt_parsed_date}")
print("\n=========\n")
print(f"Total: {len(samples)} date formats tested")
pprint(results)
@Ash-Crow
Copy link
Author

Results:

dateparser version: 0.7.6
feedparser version: 6.0.1

=========

OK for Thu, 01 Jan 04 19:48:21 GMT:     time.struct_time(tm_year=2004, tm_mon=1, tm_mday=1, tm_hour=19, tm_min=48, tm_sec=21, tm_wday=3, tm_yday=1, tm_isdst=0)
OK for Thu, 01 Jan 2004 19:48:21 GMT:     time.struct_time(tm_year=2004, tm_mon=1, tm_mday=1, tm_hour=19, tm_min=48, tm_sec=21, tm_wday=3, tm_yday=1, tm_isdst=0)
OK for 01 Jan 2004:     time.struct_time(tm_year=2004, tm_mon=1, tm_mday=1, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=3, tm_yday=1, tm_isdst=0)
OK for 01 Jan 2004 00:00 GMT:     time.struct_time(tm_year=2004, tm_mon=1, tm_mday=1, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=3, tm_yday=1, tm_isdst=0)
OK for 2003-12-31T10:14:55-08:00:     time.struct_time(tm_year=2003, tm_mon=12, tm_mday=31, tm_hour=18, tm_min=14, tm_sec=55, tm_wday=2, tm_yday=365, tm_isdst=0)
OK for 2003-12-31T10:14:55Z:     time.struct_time(tm_year=2003, tm_mon=12, tm_mday=31, tm_hour=10, tm_min=14, tm_sec=55, tm_wday=2, tm_yday=365, tm_isdst=0)
NOK for 2003: 298 days, 0:00:00 off.
    dateparser:   time.struct_time(tm_year=2003, tm_mon=10, tm_mday=26, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=6, tm_yday=299, tm_isdst=0)
    feedparser:   time.struct_time(tm_year=2003, tm_mon=1, tm_mday=1, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=2, tm_yday=1, tm_isdst=0)
NOK for 2003-12: 25 days, 0:00:00 off.
    dateparser:   time.struct_time(tm_year=2003, tm_mon=12, tm_mday=26, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=4, tm_yday=360, tm_isdst=0)
    feedparser:   time.struct_time(tm_year=2003, tm_mon=12, tm_mday=1, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=0, tm_yday=335, tm_isdst=0)
OK for 2003-12-31:     time.struct_time(tm_year=2003, tm_mon=12, tm_mday=31, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=2, tm_yday=365, tm_isdst=0)
NOK for 20031231: -363 days, 3:01:00 off.
    dateparser:   time.struct_time(tm_year=2003, tm_mon=1, tm_mday=2, tm_hour=3, tm_min=1, tm_sec=0, tm_wday=3, tm_yday=2, tm_isdst=0)
    feedparser:   time.struct_time(tm_year=2003, tm_mon=12, tm_mday=31, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=2, tm_yday=365, tm_isdst=0)
NOK for -03-12: 5946 days, 0:00:00 off.
    dateparser:   time.struct_time(tm_year=2020, tm_mon=3, tm_mday=12, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=3, tm_yday=72, tm_isdst=0)
    feedparser:   time.struct_time(tm_year=2003, tm_mon=12, tm_mday=1, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=0, tm_yday=335, tm_isdst=0)
NOK for -0312: -617661 days, 0:50:39 off.
    dateparser:   time.struct_time(tm_year=312, tm_mon=10, tm_mday=26, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=5, tm_yday=300, tm_isdst=0)
    feedparser:   time.struct_time(tm_year=2003, tm_mon=12, tm_mday=1, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=0, tm_yday=335, tm_isdst=0)
NOK for -03-12-31: 9963 days, 0:00:00 off.
    dateparser:   time.struct_time(tm_year=2031, tm_mon=3, tm_mday=12, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=2, tm_yday=71, tm_isdst=0)
    feedparser:   time.struct_time(tm_year=2003, tm_mon=12, tm_mday=1, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=0, tm_yday=335, tm_isdst=0)
NOK for 031231: 9933 days, 0:00:00 off.
    dateparser:   time.struct_time(tm_year=2031, tm_mon=3, tm_mday=12, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=2, tm_yday=71, tm_isdst=0)
    feedparser:   time.struct_time(tm_year=2003, tm_mon=12, tm_mday=31, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=2, tm_yday=365, tm_isdst=0)
NOK for 2003-335:  only feedparser could interpred this as a date.
    dateparser:   None
    feedparser:   time.struct_time(tm_year=2003, tm_mon=12, tm_mday=1, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=0, tm_yday=335, tm_isdst=0)
NOK for 03335: 11415 days, 0:00:00 off.
    dateparser:   time.struct_time(tm_year=2035, tm_mon=3, tm_mday=3, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=5, tm_yday=62, tm_isdst=0)
    feedparser:   time.struct_time(tm_year=2003, tm_mon=12, tm_mday=1, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=0, tm_yday=335, tm_isdst=0)
OK for Sun Jan 4 16:29:06 PST 2004:     time.struct_time(tm_year=2004, tm_mon=1, tm_mday=5, tm_hour=0, tm_min=29, tm_sec=6, tm_wday=0, tm_yday=5, tm_isdst=0)
NOK for Thu, 31 Jun 2004 19:48:21 GMT: Neither module could interpret this as a date
OK for Mon, 26 January 2004:     time.struct_time(tm_year=2004, tm_mon=1, tm_mday=26, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=0, tm_yday=26, tm_isdst=0)
OK for Mon, 26 Jan 2004 16:31:00 ET:     time.struct_time(tm_year=2004, tm_mon=1, tm_mday=26, tm_hour=21, tm_min=31, tm_sec=0, tm_wday=0, tm_yday=26, tm_isdst=0)
NOK for 2003-12-31T25:14:55Z:  only feedparser could interpred this as a date.
    dateparser:   None
    feedparser:   time.struct_time(tm_year=2004, tm_mon=1, tm_mday=1, tm_hour=1, tm_min=14, tm_sec=55, tm_wday=3, tm_yday=1, tm_isdst=0)
NOK for 2003-12-31T10:61:55Z:  only feedparser could interpred this as a date.
    dateparser:   None
    feedparser:   time.struct_time(tm_year=2003, tm_mon=12, tm_mday=31, tm_hour=11, tm_min=1, tm_sec=55, tm_wday=2, tm_yday=365, tm_isdst=0)
NOK for 2003-12-31T10:14:61Z:  only feedparser could interpred this as a date.
    dateparser:   None
    feedparser:   time.struct_time(tm_year=2003, tm_mon=12, tm_mday=31, tm_hour=10, tm_min=15, tm_sec=1, tm_wday=2, tm_yday=365, tm_isdst=0)
OK for 2004-07-08 23:56:58.0:     time.struct_time(tm_year=2004, tm_mon=7, tm_mday=8, tm_hour=23, tm_min=56, tm_sec=58, tm_wday=3, tm_yday=190, tm_isdst=0)
OK for 2004-07-08 23:56:58:     time.struct_time(tm_year=2004, tm_mon=7, tm_mday=8, tm_hour=23, tm_min=56, tm_sec=58, tm_wday=3, tm_yday=190, tm_isdst=0)
NOK for 2004-05-25 오 11:23:17:  only feedparser could interpred this as a date.
    dateparser:   None
    feedparser:   time.struct_time(tm_year=2004, tm_mon=5, tm_mday=25, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=1, tm_yday=146, tm_isdst=1)
OK for Κυρ, 11 Ιούλ 2004 12:00:00 EST:     time.struct_time(tm_year=2004, tm_mon=7, tm_mday=11, tm_hour=17, tm_min=0, tm_sec=0, tm_wday=6, tm_yday=193, tm_isdst=0)
NOK for július-13T9:15-05:00:  only dateparser could interpred this as a date.
    dateparser:   time.struct_time(tm_year=2013, tm_mon=7, tm_mday=26, tm_hour=14, tm_min=15, tm_sec=0, tm_wday=4, tm_yday=207, tm_isdst=0)
    feedparser:   None
NOK for Mardi, 8. Septembre 2020 - 10:04:  only dateparser could interpred this as a date.
    dateparser:   time.struct_time(tm_year=2020, tm_mon=9, tm_mday=8, tm_hour=10, tm_min=4, tm_sec=0, tm_wday=1, tm_yday=252, tm_isdst=0)
    feedparser:   None

=========

Total: 29 date formats tested
{'dateparser_only': 2,
 'different_dates': 8,
 'feedparser_only': 5,
 'no_date_found': 1,
 'same_result': 13}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment