Skip to content

Instantly share code, notes, and snippets.

@godmar
Last active February 1, 2016 18:28
Show Gist options
  • Save godmar/8e32645ddc7086494a04 to your computer and use it in GitHub Desktop.
Save godmar/8e32645ddc7086494a04 to your computer and use it in GitHub Desktop.
patches to pycounter
diff --git a/pycounter/helpers.py b/pycounter/helpers.py
index 5001da9..90f274b 100644
--- a/pycounter/helpers.py
+++ b/pycounter/helpers.py
@@ -25,7 +25,17 @@ def convert_date_run(datestring):
:param datestring: the string to convert to a date.
"""
- return datetime.datetime.strptime(datestring, "%Y-%m-%d").date()
+ try:
+ return datetime.datetime.strptime(datestring, "%Y-%m-%d").date()
+ except ValueError:
+ try:
+ return datetime.datetime.strptime(datestring, "%m/%d/%Y").date()
+ except ValueError:
+ # ISO 8601 without timezone
+ return datetime.datetime.strptime(datestring, "%Y-%m-%dT%H:%M:%S").date()
+
+ # NB: why not just use dateutil.parser.parse?
+ # because it would misparse 'Jan-11' as 2016/1/11 instead of 2011/1/1
def convert_date_column(datestring):
@@ -36,7 +46,13 @@ def convert_date_column(datestring):
:param datestring: the string to convert to a date.
"""
- return datetime.datetime.strptime(datestring.strip(), "%b-%Y").date()
+ try: # Jan-2011
+ return datetime.datetime.strptime(datestring.strip(), "%b-%Y").date()
+ except ValueError:
+ try: # Jan-11
+ return datetime.datetime.strptime(datestring.strip(), "%b-%y").date()
+ except ValueError: # 11-Jan
+ return datetime.datetime.strptime(datestring.strip(), "%y-%b").date()
def last_day(orig_date):
diff --git a/pycounter/report.py b/pycounter/report.py
index 5fda2df..455e6c9 100644
--- a/pycounter/report.py
+++ b/pycounter/report.py
@@ -545,7 +545,7 @@ def parse_generic(report_reader):
six.next(report_reader)
for line in report_reader:
- if not line:
+ if not line or all(l == "" for l in line):
continue
report.pubs.append(_parse_line(line, report, last_col))
@@ -586,8 +586,9 @@ def _parse_line(line, report, last_col):
pass
else:
if report.report_type.startswith('JR1'):
- html_total = int(line[-2])
- pdf_total = int(line[-1])
+ # ignore thousand separators (e.g. 1,000) occuring in some reports
+ html_total = int(line[-2].replace(",", ""))
+ pdf_total = int(line[-1].replace(",",""))
issn = line[3].strip()
eissn = line[4].strip()
line = line[0:last_col]
@@ -586,8 +586,8 @@ def _parse_line(line, report, last_col):
pass
else:
if report.report_type.startswith('JR1'):
- html_total = int(line[-2])
- pdf_total = int(line[-1])
+ html_total = format_stat(line[-2])
+ pdf_total = format_stat(line[-1])
issn = line[3].strip()
eissn = line[4].strip()
line = line[0:last_col]
@@ -661,8 +661,15 @@ def _year_from_header(header, report):
first_date_col = 6
elif report.report_type == 'DB2' and report.report_version == 4:
first_date_col = 5
- year = int(header[first_date_col].split('-')[1])
- if year < 100:
- year += 2000
+ for yearpos in [1, 0]:
+ try:
+ year = int(header[first_date_col].split('-')[yearpos])
+ if year < 100:
+ year += 2000
+
+ return year
+ except ValueError:
+ pass
+
+ raise Exception("Could not get year from header field: %s" % header[first_date_col])
- return year
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment