Skip to content

Instantly share code, notes, and snippets.

@kardeiz
Created August 20, 2012 19:06
Show Gist options
  • Save kardeiz/3406777 to your computer and use it in GitHub Desktop.
Save kardeiz/3406777 to your computer and use it in GitHub Desktop.
Super simple date parser
# Toggle the comments below to switch between American/rational date parsing
require 'american_date'
require 'active_support/core_ext'
# require 'date'
class SimpleDates
def self.parse(mystring)
mystring = mystring.to_s.downcase.strip
# Date._parse likes '/' better than '-', I guess?
mystring_rp = mystring.gsub("-","/")
result = single_year(mystring) ||
circa_dates(mystring) ||
seasons(mystring) ||
decades_et_al(mystring) ||
date_ranges(mystring) ||
random_punc(mystring) ||
rdate_parse(mystring_rp)
return result.blank? ? "n.d." : result
end
private
class << self
SEPARATORS = /[\/\-,\.\s]+/
def single_year(mystring)
mystring[/^\d{4}$/]
end
# & re-parse
def circa_dates(mystring)
mystring.match(/([^a-zA-Z]{1}|^)(?<ext>[cC]\.?[aA]?([^b-zB-Z]{1}|$))/).tap do |mymatch|
unless mymatch.nil?
match_inside = parse(mystring.gsub(mymatch[:ext].to_s, ""))
return "c. " + match_inside unless match_inside.nil?
end
end
end
# & reparse
def seasons(mystring)
mah_seasons = [/[Ss]pring/, /[Ss]pr/, /[Ss]ummer/, /[Ss]um/, /[Ff]all/, /[Aa]utumn/, /[Ww]inter/, /[Ww]in/, /[Ee]arly/, /[Ll]ate/ ]
unless mah_seasons.map{|x| mystring.match(x) }.compact.empty?
mah_seasons.each {|x| mystring.gsub!(x,"") }
return parse(mystring)
end
return nil
end
# & reparse
def decades_et_al(mystring)
# whole decades
return mystring.gsub("'","") if mystring.match(/\d'?s$/)
if mystring.match(/([^\d]|^)(\d{3})\-\?/)
return "#{mystring.match(/([^\d]|^)(\d{3})\-\?/)[2]}0s?"
end
if mystring.match(/([^\d]|^)(\d{3})\?/)
return "#{mystring.match(/([^\d]|^)(\d{3})\?/)[2]}0s?"
end
# uncertain
mystring.match(/\s?\(?\s?\?\s?\)?\s?/).tap do |mymatch|
mystring.gsub(mymatch.to_s, "").tap do |mysub|
return parse(mysub) + "?" unless mymatch.nil?
end
end
end
def date_ranges(mystring)
mystring.split(SEPARATORS).tap do |a|
return mystring if a.select{|x| x.to_i > 12 }.length > 2
return mystring if a.select{|x| x.to_i > 31 }.length > 1
end
nil
end
def no_date(mystring)
mystring.match(/^n\.?\s?d\.?\s?$|[Uu]ndated|[Uu]nknown|[Nn]o\s[Dd]ate/).tap do |o|
return "n.d." unless o.nil?
end
end
# & reparse
def random_punc(mystring)
mystring.match(/^\s*[\/\-,\.]+/).tap do |mymatch|
return parse(mystring.gsub(mymatch.to_s,"")) unless mymatch.nil?
end
mystring.match(/[\/\-,\.]+\s*$/).tap do |mymatch|
return parse(mystring.gsub(mymatch.to_s,"")) unless mymatch.nil?
end
end
# Date._parse is doing almost all the work....
def rdate_parse(mystring_rp)
Date._parse(mystring_rp).tap do |a|
[ a[:year], "%02d" % a[:mon].to_i, "%02d" % a[:mday].to_i ].tap do |x|
return x.select{|y| y.to_i > 0 }.compact.join("-")
end
end
nil
end
end
end
# 295 failed. 6216 passed
# 48 failed. 3382 passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment