Skip to content

Instantly share code, notes, and snippets.

@joshuabambrick
Created June 5, 2017 18:25
Show Gist options
  • Save joshuabambrick/08e3095a166904ebeb16ed88a90cd424 to your computer and use it in GitHub Desktop.
Save joshuabambrick/08e3095a166904ebeb16ed88a90cd424 to your computer and use it in GitHub Desktop.
Date Extraction Pattern Construction
package utils;
import com.google.common.collect.ImmutableList;
import options.GlobalOptionsManager;
import org.jetbrains.annotations.NotNull;
import java.util.*;
import java.util.stream.Collectors;
public class DatePartPatterns {
private static final String SOME_NON_WORD_CHARS = "\\W+";
private static final String EMPTY_MARKER = "";
public static void printAllPatterns() {
for (String pattern : getPatterns())
System.out.println(pattern);
}
public enum DatePart {
DAY("day"),
MONTH("month"),
YEAR("year"),
;
private final String captureGroupName;
DatePart(@NotNull String captureGroupName) {
this.captureGroupName = captureGroupName;
}
@NotNull
public String getCaptureGroupName() {
return captureGroupName;
}
}
private enum Month {
January("January", "Jan", "Jan\\."),
February("February", "Feb", "Feb\\."),
March("March", "Mar", "Mar\\."),
April("April", "Apr", "Apr\\."),
May("May", "May", "May\\."),
June("June", "Jun", "Jun\\."),
July("July", "Jul", "Jul\\."),
August("August", "Aug", "Aug\\."),
September("September", "Sep", "Sep\\.", "Sept", "Sept\\."),
October("October", "Oct", "Oct\\."),
November("November", "Nov", "Nov\\."),
December("December", "Dec", "Dec\\."),
;
private final Set<String> names = new HashSet<>();
Month(@NotNull String... names) {
this.names.addAll(Arrays.asList(names));
}
@NotNull
public Set<String> getNames() {
return Collections.unmodifiableSet(names);
}
public boolean matches(@NotNull String str) {
for (String name : names) {
if (str.equalsIgnoreCase(name)) {
return true;
}
}
return false;
}
}
private DatePartPatterns() {}
public static Integer matchMonthString(@NotNull String str) {
for (Month month : Month.values()) {
if (month.matches(str)) {
return month.ordinal() + 1;
}
}
return null;
}
private static String createNamedRegexCapturingGroup(@NotNull String pattern, @NotNull DatePart datePart) {
return String.format("(?<%s>%s)", datePart.getCaptureGroupName(), pattern);
}
private static final String DAY_SUFFIX_PATTERN = "\\s*((st)|(nd)|(rd)|(th))?(\\s*( day)? of)?";
private static final List<String> DAY_PATTERNS = ImmutableList.<String>builder()
.add(createNamedRegexCapturingGroup("[0-3][0-9]", DatePart.DAY) + DAY_SUFFIX_PATTERN)
.add(createNamedRegexCapturingGroup("[1-9]", DatePart.DAY) + DAY_SUFFIX_PATTERN)
.build();
private static final List<String> MONTH_PATTERNS;
private static final List<String> YEAR_PATTERNS = ImmutableList.<String>builder()
.add("(19|20)\\d{2}")
.build()
.stream()
.map(str -> createNamedRegexCapturingGroup(str, DatePart.YEAR))
.collect(Collectors.toList());
private static final List<String> DATE_PATTERNS;
@NotNull
public static List<String> getPatterns() {
return DATE_PATTERNS;
}
static {
MONTH_PATTERNS = determineInitialMonthPatterns();
DATE_PATTERNS = determineInitialDatePatterns();
}
private static List<String> determineInitialDatePatterns() {
List<String> datePatterns = new ArrayList<>();
String termDelimiter = SOME_NON_WORD_CHARS;
String startMarker = getEnforceComplete() ? "^" : EMPTY_MARKER;
String endMarker = getEnforceComplete() ? "$" : EMPTY_MARKER;
datePatterns.addAll(PatternJoiner.
joinPatterns(startMarker, endMarker, termDelimiter, DAY_PATTERNS, MONTH_PATTERNS, YEAR_PATTERNS));
datePatterns.addAll(PatternJoiner.
joinPatterns(startMarker, endMarker, termDelimiter, MONTH_PATTERNS, DAY_PATTERNS, YEAR_PATTERNS));
datePatterns.addAll(PatternJoiner.
joinPatterns(startMarker, endMarker, termDelimiter, MONTH_PATTERNS, YEAR_PATTERNS));
datePatterns.addAll(PatternJoiner.
joinPatterns(startMarker, endMarker, termDelimiter, DAY_PATTERNS, MONTH_PATTERNS));
datePatterns.addAll(PatternJoiner.
joinPatterns(startMarker, endMarker, termDelimiter, MONTH_PATTERNS, DAY_PATTERNS));
datePatterns.addAll(PatternJoiner.
joinPatterns(startMarker, endMarker, termDelimiter, YEAR_PATTERNS));
return Collections.unmodifiableList(datePatterns);
}
@NotNull
private static List<String> determineInitialMonthPatterns() {
List<String> monthPatterns = new ArrayList<>();
String orDelimiter = "|";
monthPatterns.add(Arrays.stream(Month.values())
.flatMap(month -> month.getNames().stream())
.collect(Collectors.joining(orDelimiter)));
monthPatterns.add("0?[1-9]|1[0-2]");
return monthPatterns.stream()
.map(str -> createNamedRegexCapturingGroup(str, DatePart.MONTH))
.collect(Collectors.toList());
}
private static boolean getEnforceComplete() {
return GlobalOptionsManager.get().getDatePatternEnforceCompleteMatch().isEnabled();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment