Skip to content

Instantly share code, notes, and snippets.

@niborg
Created July 12, 2019 18:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save niborg/8a96e2aac71be48692e971e36a7ae27d to your computer and use it in GitHub Desktop.
Save niborg/8a96e2aac71be48692e971e36a7ae27d to your computer and use it in GitHub Desktop.
Service wrapping Ruby's standard library so as to sniff out date ranges.
# frozen_string_literal: true
module Utilities
# Class wrapping Ruby's standard library so as to sniff out date ranges.
#
class DateParser
Error = Class.new(StandardError)
RangeParseError = Class.new(Error)
DateRangeNotPresent = Class.new(Error)
DEFAULT_OPTIONS = {
expected_delimited_date_format: 'us'
}
MONTH_NAMES = Date::MONTHNAMES.reject(&:nil?).map(&:downcase).freeze
ABBR_MONTH_NAMES = Date::ABBR_MONTHNAMES.reject(&:nil?).map(&:downcase).freeze
MONTH_NAMES_REGEX = Regexp.union(MONTH_NAMES)
ABBR_MONTH_NAMES_REGEX = Regexp.union(ABBR_MONTH_NAMES)
def initialize(date_string, options = {})
@date_string = date_string
@options = DEFAULT_OPTIONS.merge(options)
end
def call
if delimiter_used_for_expressing_date
resolve_range_from_string_with_delimited_dates(delimiter_used_for_expressing_date)
else
resolve_range_from_string_without_delimited_dates
end
rescue DateRangeNotPresent
if delimiter_used_for_expressing_date
parse_delimited_date(date_string.delete(' '))
else
Date.parse date_string
end
end
private
attr_reader :date_string, :options
def parse_delimited_date(date)
# TODO: make this responsive to different formats. We can usually sniff out the year
# and determine what format to provide based on that.
year_token = date_string.scan(/\d{4}/).any? ? 'Y' : 'y'
delimiter = delimiter_used_for_expressing_date
parse_string = begin
if options[:expected_delimited_date_format] == 'us'
"%m#{delimiter}%d#{delimiter}%#{year_token}"
else
"%d#{delimiter}%m#{delimiter}%#{year_token}"
end
end
Date.strptime(date.delete(' '), parse_string)
end
def range_delimiter_present?
@range_delimiter_present ||= begin
delimiters = word_range_delimiters + ['-']
result = delimiters.find do |delimiter|
date_string.include? delimiter
end
result.present?
end
end
def delimited_date_range_regex_with(delimiter)
regex_safe_delimiter = Regexp.quote(delimiter)
/\d{1,4}\s*#{regex_safe_delimiter}\s*\d{2}\s*#{regex_safe_delimiter}\s*\d{1,4}/
end
def delimiter_used_for_expressing_date
@delimiter_used_for_expressing_date ||= symbol_delimiters.find do |delimiter|
date_string.scan(delimited_date_range_regex_with(delimiter)).any?
end
end
def resolve_range_from_string_with_delimited_dates(date_delimiter)
delimited_date_range_strings = date_string.scan(
delimited_date_range_regex_with(date_delimiter)
)
if delimited_date_range_strings.one?
# Delimiter is used to only express a date.
raise DateRangeNotPresent
elsif delimited_date_range_strings.size == 2
# More than one candidate dates, we assume this makes up date range.
# Also, remove white space that confuses the Date class.
date_1 = parse_delimited_date(delimited_date_range_strings[0])
date_2 = parse_delimited_date(delimited_date_range_strings[1])
return date_1..date_2
else
raise RangeParseError, <<~MESSAGE
String "#{date_string}" was parsed to have candidate dates of
#{delimited_date_range_strings.to_sentence}
MESSAGE
end
end
def resolve_range_from_string_without_delimited_dates
raise DateRangeNotPresent unless range_delimiter_present?
if days.size > 2
raise RangeParseError, <<~MESSAGE
String "#{date_string}" has too many candidate days: #{days.to_sentence}
MESSAGE
end
if months.empty? || months.size > 2
raise RangeParseError, <<~MESSAGE
String "#{date_string}" could not be parsed to resolve months:
#{months.to_sentence || none}
MESSAGE
end
if years.size > 2
raise RangeParseError, <<~MESSAGE
String "#{date_string}" could not be parsed to resolve years:
#{years.to_sentence}
MESSAGE
end
raise DateRangeNotPresent if days.one?
date_1 = Date.new(years[0], months[0], days[0])
date_2 = Date.new(years[1] || years[0], months[1] || months[0], days[1] || days[0])
if date_2 <= date_1
raise RangeParseError, <<~MESSAGE
String "#{date_string}" has invalid range format, resolved #{date_1} to #{date_2}
MESSAGE
end
date_1..date_2
end
def symbol_delimiters
['-', '/', '.'].freeze
end
def word_range_delimiters
@work_range_delimiters ||= I18n.t('lib.utilities.date_parsing.range_delimiters')
end
def days
@days ||= date_string.scan(/\d+/).reject { |digit| digit.size > 2 }.map(&:to_i)
end
def months
@months ||= begin
result = months_by_full_name
result = months_by_abbr_name if result.empty?
result
end
end
def months_by_full_name
date_string.downcase.scan(MONTH_NAMES_REGEX).map(&method(:month_integer_for)).compact
end
def months_by_abbr_name
date_string.downcase.scan(ABBR_MONTH_NAMES_REGEX).map(&method(:month_integer_for)).compact
end
def month_integer_for(month)
index = MONTH_NAMES.index(month.downcase) || ABBR_MONTH_NAMES.index(month.downcase)
index.nil? ? index : index + 1
end
def years
@years ||= begin
result = date_string.scan(/\d{4}/).map(&:to_i)
result = [Date.today.year] if result.empty?
result
end
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment