Created
July 12, 2019 18:19
-
-
Save niborg/8a96e2aac71be48692e971e36a7ae27d to your computer and use it in GitHub Desktop.
Service wrapping Ruby's standard library so as to sniff out date ranges.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# frozen_string_literal: true | |
module Utilities | |
# Class wrapping Ruby's standard library so as to sniff out date ranges. | |
# | |
class DateParser | |
Error = Class.new(StandardError) | |
RangeParseError = Class.new(Error) | |
DateRangeNotPresent = Class.new(Error) | |
DEFAULT_OPTIONS = { | |
expected_delimited_date_format: 'us' | |
} | |
MONTH_NAMES = Date::MONTHNAMES.reject(&:nil?).map(&:downcase).freeze | |
ABBR_MONTH_NAMES = Date::ABBR_MONTHNAMES.reject(&:nil?).map(&:downcase).freeze | |
MONTH_NAMES_REGEX = Regexp.union(MONTH_NAMES) | |
ABBR_MONTH_NAMES_REGEX = Regexp.union(ABBR_MONTH_NAMES) | |
def initialize(date_string, options = {}) | |
@date_string = date_string | |
@options = DEFAULT_OPTIONS.merge(options) | |
end | |
def call | |
if delimiter_used_for_expressing_date | |
resolve_range_from_string_with_delimited_dates(delimiter_used_for_expressing_date) | |
else | |
resolve_range_from_string_without_delimited_dates | |
end | |
rescue DateRangeNotPresent | |
if delimiter_used_for_expressing_date | |
parse_delimited_date(date_string.delete(' ')) | |
else | |
Date.parse date_string | |
end | |
end | |
private | |
attr_reader :date_string, :options | |
def parse_delimited_date(date) | |
# TODO: make this responsive to different formats. We can usually sniff out the year | |
# and determine what format to provide based on that. | |
year_token = date_string.scan(/\d{4}/).any? ? 'Y' : 'y' | |
delimiter = delimiter_used_for_expressing_date | |
parse_string = begin | |
if options[:expected_delimited_date_format] == 'us' | |
"%m#{delimiter}%d#{delimiter}%#{year_token}" | |
else | |
"%d#{delimiter}%m#{delimiter}%#{year_token}" | |
end | |
end | |
Date.strptime(date.delete(' '), parse_string) | |
end | |
def range_delimiter_present? | |
@range_delimiter_present ||= begin | |
delimiters = word_range_delimiters + ['-'] | |
result = delimiters.find do |delimiter| | |
date_string.include? delimiter | |
end | |
result.present? | |
end | |
end | |
def delimited_date_range_regex_with(delimiter) | |
regex_safe_delimiter = Regexp.quote(delimiter) | |
/\d{1,4}\s*#{regex_safe_delimiter}\s*\d{2}\s*#{regex_safe_delimiter}\s*\d{1,4}/ | |
end | |
def delimiter_used_for_expressing_date | |
@delimiter_used_for_expressing_date ||= symbol_delimiters.find do |delimiter| | |
date_string.scan(delimited_date_range_regex_with(delimiter)).any? | |
end | |
end | |
def resolve_range_from_string_with_delimited_dates(date_delimiter) | |
delimited_date_range_strings = date_string.scan( | |
delimited_date_range_regex_with(date_delimiter) | |
) | |
if delimited_date_range_strings.one? | |
# Delimiter is used to only express a date. | |
raise DateRangeNotPresent | |
elsif delimited_date_range_strings.size == 2 | |
# More than one candidate dates, we assume this makes up date range. | |
# Also, remove white space that confuses the Date class. | |
date_1 = parse_delimited_date(delimited_date_range_strings[0]) | |
date_2 = parse_delimited_date(delimited_date_range_strings[1]) | |
return date_1..date_2 | |
else | |
raise RangeParseError, <<~MESSAGE | |
String "#{date_string}" was parsed to have candidate dates of | |
#{delimited_date_range_strings.to_sentence} | |
MESSAGE | |
end | |
end | |
def resolve_range_from_string_without_delimited_dates | |
raise DateRangeNotPresent unless range_delimiter_present? | |
if days.size > 2 | |
raise RangeParseError, <<~MESSAGE | |
String "#{date_string}" has too many candidate days: #{days.to_sentence} | |
MESSAGE | |
end | |
if months.empty? || months.size > 2 | |
raise RangeParseError, <<~MESSAGE | |
String "#{date_string}" could not be parsed to resolve months: | |
#{months.to_sentence || none} | |
MESSAGE | |
end | |
if years.size > 2 | |
raise RangeParseError, <<~MESSAGE | |
String "#{date_string}" could not be parsed to resolve years: | |
#{years.to_sentence} | |
MESSAGE | |
end | |
raise DateRangeNotPresent if days.one? | |
date_1 = Date.new(years[0], months[0], days[0]) | |
date_2 = Date.new(years[1] || years[0], months[1] || months[0], days[1] || days[0]) | |
if date_2 <= date_1 | |
raise RangeParseError, <<~MESSAGE | |
String "#{date_string}" has invalid range format, resolved #{date_1} to #{date_2} | |
MESSAGE | |
end | |
date_1..date_2 | |
end | |
def symbol_delimiters | |
['-', '/', '.'].freeze | |
end | |
def word_range_delimiters | |
@work_range_delimiters ||= I18n.t('lib.utilities.date_parsing.range_delimiters') | |
end | |
def days | |
@days ||= date_string.scan(/\d+/).reject { |digit| digit.size > 2 }.map(&:to_i) | |
end | |
def months | |
@months ||= begin | |
result = months_by_full_name | |
result = months_by_abbr_name if result.empty? | |
result | |
end | |
end | |
def months_by_full_name | |
date_string.downcase.scan(MONTH_NAMES_REGEX).map(&method(:month_integer_for)).compact | |
end | |
def months_by_abbr_name | |
date_string.downcase.scan(ABBR_MONTH_NAMES_REGEX).map(&method(:month_integer_for)).compact | |
end | |
def month_integer_for(month) | |
index = MONTH_NAMES.index(month.downcase) || ABBR_MONTH_NAMES.index(month.downcase) | |
index.nil? ? index : index + 1 | |
end | |
def years | |
@years ||= begin | |
result = date_string.scan(/\d{4}/).map(&:to_i) | |
result = [Date.today.year] if result.empty? | |
result | |
end | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment