Created
September 21, 2012 13:25
-
-
Save monperrus/3761448 to your computer and use it in GitHub Desktop.
Extraction of dates with Selenium 2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// get desired element and save its visible text | |
WebDriver webDriver = new FirefoxDriver(); | |
webDriver.navigate().to("http://example.com/some/page"); | |
WebElement elementWithDate = webDriver.findElement(By.cssSelector("#creation-date")); | |
String text = elementWithDate.getText(); | |
Date date = null; boolean dateFound = false; | |
String year,month,monthName,day,hour,minute,second,ampm = null; | |
String regexDelimiter = "[-:\\/.,]"; | |
String regexDay = "((?:[0-2]?\\d{1})|(?:[3][01]{1}))"; | |
String regexMonth = "(?:([0]?[1-9]|[1][012])|(Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Sept|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?))"; | |
String regexYear = "((?:[1]{1}\\d{1}\\d{1}\\d{1})|(?:[2]{1}\\d{3}))"; | |
String regexHourMinuteSecond = "(?:(?:\\s)((?:[0-1][0-9])|(?:[2][0-3])|(?:[0-9])):([0-5][0-9])(?::([0-5][0-9]))?(?:\\s?(am|AM|pm|PM))?)?"; | |
String regexEndswith = "(?![\\d])"; | |
// DD/MM/YYYY | |
String regexDateEuropean = regexDay + regexDelimiter + regexMonth + regexDelimiter + regexYear + regexHourMinuteSecond + regexEndswith; | |
// MM/DD/YYYY | |
String regexDateAmerican = regexMonth + regexDelimiter + regexDay + regexDelimiter + regexYear + regexHourMinuteSecond + regexEndswith; | |
// YYYY/MM/DD | |
String regexDateTechnical = regexYear + regexDelimiter + regexMonth + regexDelimiter + regexDay + regexHourMinuteSecond + regexEndswith; | |
// see if there are any matches | |
Matcher m = checkDatePattern(regexDateEuropean, text); | |
if (m.find()) { // extraction code | |
day = m.group(1);month = m.group(2);monthName = m.group(3);year = m.group(4);hour = m.group(5);minute = m.group(6);second = m.group(7);ampm = m.group(8); | |
dateFound = true; | |
} | |
if(!dateFound) { | |
m = checkDatePattern(regexDateAmerican, text); | |
if (m.find()) { /* extraction code */ } } | |
if(!dateFound) { | |
m = checkDatePattern(regexDateTechnical, text); | |
if (m.find()) { /* extraction code */ } } | |
// construct date object if date was found | |
if(dateFound) { | |
String dateFormatPattern = "";String dayPattern = "";String dateString = ""; | |
if(day != null) { dayPattern = "d" + (day.length() == 2 ? "d" : ""); } | |
if(day != null && month != null && year != null) { | |
dateFormatPattern = "yyyy MM " + dayPattern; | |
dateString = year + " " + month + " " + day; | |
} else if(monthName != null) { | |
if(monthName.length() == 3) dateFormatPattern = "yyyy MMM " + dayPattern; | |
else dateFormatPattern = "yyyy MMMM " + dayPattern; | |
dateString = year + " " + monthName + " " + day; | |
} | |
if(hour != null && minute != null) { | |
dateFormatPattern += " hh:mm"; | |
dateString += " " + hour + ":" + minute; | |
if(second != null) { | |
dateFormatPattern += ":ss"; | |
dateString += ":" + second; | |
} | |
} | |
if(!dateFormatPattern.equals("") && !dateString.equals("")) { | |
SimpleDateFormat dateFormat = new SimpleDateFormat(dateFormatPattern.trim(), Locale.US); | |
date = new Date(dateFormat.parse(dateString.trim())); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment