Skip to content

Instantly share code, notes, and snippets.

@mzelzoghbi
Created May 8, 2016 19:11
Show Gist options
  • Save mzelzoghbi/ad1c14abff7e0702714a3961aa61679f to your computer and use it in GitHub Desktop.
Save mzelzoghbi/ad1c14abff7e0702714a3961aa61679f to your computer and use it in GitHub Desktop.
public class Regex {
/**
* Regular expression pattern to match all IANA top-level domains.
* List accurate as of 2007/06/15. List taken from:
* http://data.iana.org/TLD/tlds-alpha-by-domain.txt
* This pattern is auto-generated by //device/tools/make-iana-tld-pattern.py
*/
private static final Pattern TOP_LEVEL_DOMAIN_PATTERN
= Pattern.compile(
"((aero|arpa|asia|a[cdefgilmnoqrstuwxz])"
+ "|(biz|b[abdefghijmnorstvwyz])"
+ "|(cat|com|coop|c[acdfghiklmnoruvxyz])"
+ "|d[ejkmoz]"
+ "|(edu|e[cegrstu])"
+ "|f[ijkmor]"
+ "|(gov|g[abdefghilmnpqrstuwy])"
+ "|h[kmnrtu]"
+ "|(info|int|i[delmnoqrst])"
+ "|(jobs|j[emop])"
+ "|k[eghimnrwyz]"
+ "|l[abcikrstuvy]"
+ "|(mil|mobi|museum|m[acdghklmnopqrstuvwxyz])"
+ "|(name|net|n[acefgilopruz])"
+ "|(org|om)"
+ "|(pro|p[aefghklmnrstwy])"
+ "|qa"
+ "|r[eouw]"
+ "|s[abcdeghijklmnortuvyz]"
+ "|(tel|travel|t[cdfghjklmnoprtvwz])"
+ "|u[agkmsyz]"
+ "|v[aceginu]"
+ "|w[fs]"
+ "|y[etu]"
+ "|z[amw])");
/**
* Regular expression pattern to match RFC 1738 URLs
* List accurate as of 2007/06/15. List taken from:
* http://data.iana.org/TLD/tlds-alpha-by-domain.txt
* This pattern is auto-generated by //device/tools/make-iana-tld-pattern.py
*/
public static final Pattern WEB_URL_PATTERN
= Pattern.compile(
"((?:(http|https|Http|Https):\\/\\/(?:(?:[a-zA-Z0-9\\$\\-\\_\\.\\+\\!\\*\\'\\(\\)"
+ "\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,64}(?:\\:(?:[a-zA-Z0-9\\$\\-\\_"
+ "\\.\\+\\!\\*\\'\\(\\)\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,25})?\\@)?)?"
+ "((?:(?:[a-zA-Z0-9][a-zA-Z0-9\\-]{0,64}\\.)+" // named host
+ "(?:" // plus top level domain
+ "(?:aero|arpa|asia|a[cdefgilmnoqrstuwxz])"
+ "|(?:biz|b[abdefghijmnorstvwyz])"
+ "|(?:cat|com|coop|c[acdfghiklmnoruvxyz])"
+ "|d[ejkmoz]"
+ "|(?:edu|e[cegrstu])"
+ "|f[ijkmor]"
+ "|(?:gov|g[abdefghilmnpqrstuwy])"
+ "|h[kmnrtu]"
+ "|(?:info|int|i[delmnoqrst])"
+ "|(?:jobs|j[emop])"
+ "|k[eghimnrwyz]"
+ "|l[abcikrstuvy]"
+ "|(?:mil|mobi|museum|m[acdghklmnopqrstuvwxyz])"
+ "|(?:name|net|n[acefgilopruz])"
+ "|(?:org|om)"
+ "|(?:pro|p[aefghklmnrstwy])"
+ "|qa"
+ "|r[eouw]"
+ "|s[abcdeghijklmnortuvyz]"
+ "|(?:tel|travel|t[cdfghjklmnoprtvwz])"
+ "|u[agkmsyz]"
+ "|v[aceginu]"
+ "|w[fs]"
+ "|y[etu]"
+ "|z[amw]))"
+ "|(?:(?:25[0-5]|2[0-4]" // or ip address
+ "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(?:25[0-5]|2[0-4][0-9]"
+ "|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(?:25[0-5]|2[0-4][0-9]|[0-1]"
+ "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}"
+ "|[1-9][0-9]|[0-9])))"
+ "|\\.\\.\\."
+ "(?:\\:\\d{1,5})?)" // plus option port number
+ "(\\/(?:(?:[a-zA-Z0-9\\;\\/\\?\\:\\@\\&\\=\\#\\~" // plus option query params
+ "\\-\\.\\+\\!\\*\\'\\(\\)\\,\\_])|(?:\\%[a-fA-F0-9]{2}))*)?"
+ "(?:\\b|$)"); // and finally, a word boundary or end of
// input. This is to stop foo.sure from
// matching as foo.su
private static final Pattern IP_ADDRESS_PATTERN
= Pattern.compile(
"((25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(25[0-5]|2[0-4]"
+ "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1]"
+ "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}"
+ "|[1-9][0-9]|[0-9]))");
public static Pattern DOMAIN_NAME_PATTERN
= Pattern.compile(
"(((([a-zA-Z0-9][a-zA-Z0-9\\-]*)*[a-zA-Z0-9]\\.)+"
+ TOP_LEVEL_DOMAIN_PATTERN + ")|"
+ IP_ADDRESS_PATTERN + ")");
public static final Pattern EMAIL_ADDRESS_PATTERN
= Pattern.compile(
"[a-zA-Z0-9\\+\\.\\_\\%\\-]{1,256}" +
"\\@" +
"[a-zA-Z0-9][a-zA-Z0-9\\-]{0,64}" +
"(" +
"\\." +
"[a-zA-Z0-9][a-zA-Z0-9\\-]{0,25}" +
")+"
);
/**
* This pattern is intended for searching for things that look like they
* might be phone numbers in arbitrary text, not for validating whether
* something is in fact a phone number. It will miss many things that
* are legitimate phone numbers.
* <p/>
* <p> The pattern matches the following:
* <ul>
* <li>Optionally, a + sign followed immediately by one or more digits. Spaces, dots, or dashes
* may follow.
* <li>Optionally, sets of digits in parentheses, separated by spaces, dots, or dashes.
* <li>A string starting and ending with a digit, containing digits, spaces, dots, and/or dashes.
* </ul>
*/
public static final Pattern PHONE_PATTERN
= Pattern.compile( // sdd = space, dot, or dash
"(\\+[0-9]+[\\- \\.]*)?" // +<digits><sdd>*
+ "(\\([0-9]+\\)[\\- \\.]*)?" // (<digits>)<sdd>*
+ "([0-9][0-9\\- \\.][0-9\\- \\.]+[0-9])"); // <digit><digit|sdd>+<digit>
public static final Pattern HASHTAG_PATTERN_OLD
= Pattern.compile("#([\\p{L}\\p{InThai}$A-Za-z0-9_-]+)");
public static final Pattern CASHTAG_PATTERN_OLD
= Pattern.compile("\\$([\\p{L}\\p{InThai}A-Z]+)");
public static final Pattern MENTION_PATTERN_OLD
= Pattern.compile("@([\\p{L}\\p{InThai}A-Za-z0-9_-]+)");
/**
* Convenience method to take all of the non-null matching groups in a
* regex Matcher and return them as a concatenated string.
*
* @param matcher The Matcher object from which grouped text will
* be extracted
* @return A String comprising all of the non-null matched
* groups concatenated together
*/
public static String concatGroups(Matcher matcher) {
StringBuilder b = new StringBuilder();
final int numGroups = matcher.groupCount();
for (int i = 1; i <= numGroups; i++) {
String s = matcher.group(i);
System.err.println("Group(" + i + ") : " + s);
if (s != null) {
b.append(s);
}
}
return b.toString();
}
/**
* Convenience method to return only the digits and plus signs
* in the matching string.
*
* @param matcher The Matcher object from which digits and plus will
* be extracted
* @return A String comprising all of the digits and plus in
* the match
*/
public static String digitsAndPlusOnly(Matcher matcher) {
StringBuilder buffer = new StringBuilder();
String matchingRegion = matcher.group();
for (int i = 0, size = matchingRegion.length(); i < size; i++) {
char character = matchingRegion.charAt(i);
if (character == '+' || Character.isDigit(character)) {
buffer.append(character);
}
}
return buffer.toString();
}
// Twitters own regex patterns
private static final String UNICODE_SPACES = "[" +
"\\u0009-\\u000d" + // # White_Space # Cc [5] <control-0009>..<control-000D>
"\\u0020" + // White_Space # Zs SPACE
"\\u0085" + // White_Space # Cc <control-0085>
"\\u00a0" + // White_Space # Zs NO-BREAK SPACE
"\\u1680" + // White_Space # Zs OGHAM SPACE MARK
"\\u180E" + // White_Space # Zs MONGOLIAN VOWEL SEPARATOR
"\\u2000-\\u200a" + // # White_Space # Zs [11] EN QUAD..HAIR SPACE
"\\u2028" + // White_Space # Zl LINE SEPARATOR
"\\u2029" + // White_Space # Zp PARAGRAPH SEPARATOR
"\\u202F" + // White_Space # Zs NARROW NO-BREAK SPACE
"\\u205F" + // White_Space # Zs MEDIUM MATHEMATICAL SPACE
"\\u3000" + // White_Space # Zs IDEOGRAPHIC SPACE
"]";
private static String LATIN_ACCENTS_CHARS = "\\u00c0-\\u00d6\\u00d8-\\u00f6\\u00f8-\\u00ff" + // Latin-1
"\\u0100-\\u024f" + // Latin Extended A and B
"\\u0253\\u0254\\u0256\\u0257\\u0259\\u025b\\u0263\\u0268\\u026f\\u0272\\u0289\\u028b" + // IPA Extensions
"\\u02bb" + // Hawaiian
"\\u0300-\\u036f" + // Combining diacritics
"\\u1e00-\\u1eff"; // Latin Extended Additional (mostly for Vietnamese)
private static final String HASHTAG_ALPHA_CHARS = "a-z" + LATIN_ACCENTS_CHARS +
"\\u0400-\\u04ff\\u0500-\\u0527" + // Cyrillic
"\\u2de0-\\u2dff\\ua640-\\ua69f" + // Cyrillic Extended A/B
"\\u0591-\\u05bf\\u05c1-\\u05c2\\u05c4-\\u05c5\\u05c7" +
"\\u05d0-\\u05ea\\u05f0-\\u05f4" + // Hebrew
"\\ufb1d-\\ufb28\\ufb2a-\\ufb36\\ufb38-\\ufb3c\\ufb3e\\ufb40-\\ufb41" +
"\\ufb43-\\ufb44\\ufb46-\\ufb4f" + // Hebrew Pres. Forms
"\\u0610-\\u061a\\u0620-\\u065f\\u066e-\\u06d3\\u06d5-\\u06dc" +
"\\u06de-\\u06e8\\u06ea-\\u06ef\\u06fa-\\u06fc\\u06ff" + // Arabic
"\\u0750-\\u077f\\u08a0\\u08a2-\\u08ac\\u08e4-\\u08fe" + // Arabic Supplement and Extended A
"\\ufb50-\\ufbb1\\ufbd3-\\ufd3d\\ufd50-\\ufd8f\\ufd92-\\ufdc7\\ufdf0-\\ufdfb" + // Pres. Forms A
"\\ufe70-\\ufe74\\ufe76-\\ufefc" + // Pres. Forms B
"\\u200c" + // Zero-Width Non-Joiner
"\\u0e01-\\u0e3a\\u0e40-\\u0e4e" + // Thai
"\\u1100-\\u11ff\\u3130-\\u3185\\uA960-\\uA97F\\uAC00-\\uD7AF\\uD7B0-\\uD7FF" + // Hangul (Korean)
"\\p{InHiragana}\\p{InKatakana}" + // Japanese Hiragana and Katakana
"\\p{InCJKUnifiedIdeographs}" + // Japanese Kanji / Chinese Han
"\\u3003\\u3005\\u303b" + // Kanji/Han iteration marks
"\\uff21-\\uff3a\\uff41-\\uff5a" + // full width Alphabet
"\\uff66-\\uff9f" + // half width Katakana
"\\uffa1-\\uffdc"; // half width Hangul (Korean)
private static final String HASHTAG_ALPHA_NUMERIC_CHARS = "0-9\\uff10-\\uff19_" + HASHTAG_ALPHA_CHARS;
private static final String HASHTAG_ALPHA = "[" + HASHTAG_ALPHA_CHARS +"]";
private static final String HASHTAG_ALPHA_NUMERIC = "[" + HASHTAG_ALPHA_NUMERIC_CHARS +"]";
/* URL related hash regex collection */
private static final String URL_VALID_PRECEEDING_CHARS = "(?:[^A-Z0-9@@$##\u202A-\u202E]|^)";
private static final String URL_VALID_CHARS = "[\\p{Alnum}" + LATIN_ACCENTS_CHARS + "]";
private static final String URL_VALID_SUBDOMAIN = "(?>(?:" + URL_VALID_CHARS + "[" + URL_VALID_CHARS + "\\-_]*)?" + URL_VALID_CHARS + "\\.)";
private static final String URL_VALID_DOMAIN_NAME = "(?:(?:" + URL_VALID_CHARS + "[" + URL_VALID_CHARS + "\\-]*)?" + URL_VALID_CHARS + "\\.)";
/* Any non-space, non-punctuation characters. \p{Z} = any kind of whitespace or invisible separator. */
private static final String URL_VALID_UNICODE_CHARS = "[.[^\\p{Punct}\\s\\p{Z}\\p{InGeneralPunctuation}]]";
private static final String URL_VALID_GTLD =
"(?:(?:" +
"academy|accountants|active|actor|aero|agency|airforce|archi|army|arpa|asia|associates|attorney|audio|autos|" +
"axa|bar|bargains|bayern|beer|berlin|best|bid|bike|bio|biz|black|blackfriday|blue|bmw|boutique|brussels|build|" +
"builders|buzz|bzh|cab|camera|camp|cancerresearch|capetown|capital|cards|care|career|careers|cash|cat|catering|" +
"center|ceo|cheap|christmas|church|citic|claims|cleaning|clinic|clothing|club|codes|coffee|college|cologne|com|" +
"community|company|computer|condos|construction|consulting|contractors|cooking|cool|coop|country|credit|" +
"creditcard|cruises|cuisinella|dance|dating|degree|democrat|dental|dentist|desi|diamonds|digital|direct|" +
"directory|discount|dnp|domains|durban|edu|education|email|engineer|engineering|enterprises|equipment|estate|" +
"eus|events|exchange|expert|exposed|fail|farm|feedback|finance|financial|fish|fishing|fitness|flights|florist|" +
"foo|foundation|frogans|fund|furniture|futbol|gal|gallery|gift|gives|glass|global|globo|gmo|gop|gov|graphics|" +
"gratis|green|gripe|guide|guitars|guru|hamburg|haus|hiphop|hiv|holdings|holiday|homes|horse|host|house|" +
"immobilien|industries|info|ink|institute|insure|int|international|investments|jetzt|jobs|joburg|juegos|kaufen|" +
"kim|kitchen|kiwi|koeln|kred|land|lawyer|lease|lgbt|life|lighting|limited|limo|link|loans|london|lotto|luxe|" +
"luxury|maison|management|mango|market|marketing|media|meet|menu|miami|mil|mini|mobi|moda|moe|monash|mortgage|" +
"moscow|motorcycles|museum|nagoya|name|navy|net|neustar|news|nhk|ninja|nyc|okinawa|onl|online|org|organic|ovh|paris|" +
"partners|parts|photo|photography|photos|physio|pics|pictures|pink|place|plumbing|post|press|pro|productions|" +
"properties|pub|qpon|quebec|recipes|red|rehab|reise|reisen|ren|rentals|repair|report|republican|rest|reviews|" +
"rich|rio|rocks|rodeo|ruhr|ryukyu|saarland|schmidt|schule|scot|services|sexy|shiksha|shoes|singles|social|" +
"software|sohu|solar|solutions|soy|space|spiegel|supplies|supply|support|surf|surgery|suzuki|systems|tattoo|" +
"tax|tech|technology|tel|tienda|tips|tirol|today|tokyo|tools|town|toys|trade|training|travel|university|uno|" +
"vacations|vegas|ventures|versicherung|vet|viajes|villas|vision|vlaanderen|vodka|vote|voting|voto|voyage|wang|" +
"watch|webcam|website|wed|wien|wiki|works|wtc|wtf|xxx|xyz|yachts|yokohama|zone|дети|москва|онлайн|орг|сайт|" +
"بازار|شبكة|موقع|संगठन|みんな|世界|中信|中文网|公司|公益|商城|商标|在线|我爱你|政务|机构|游戏|移动|组织机构|网址|网络|集团|삼성" +
")(?=[^\\p{Alnum}@]|$))";
private static final String URL_VALID_CCTLD =
"(?:(?:" +
"ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bl|bm|bn|bo|bq|br|bs|bt|bv|" +
"bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cw|cx|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|" +
"fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|" +
"io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mf|" +
"mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|" +
"pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|" +
"sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|" +
"za|zm|zw|мкд|мон|рф|срб|укр|қаз|الاردن|الجزائر|السعودية|المغرب|امارات|ایران|بھارت|تونس|سودان|سورية|عمان|" +
"فلسطين|قطر|مصر|مليسيا|پاکستان|भारत|বাংলা|ভারত|ਭਾਰਤ|ભારત|இந்தியா|இலங்கை|சிங்கப்பூர்|భారత్|ලංකා|ไทย|გე|中国|中國|台湾|" +
"台灣|新加坡|香港|한국" +
")(?=[^\\p{Alnum}@]|$))";
private static final String URL_PUNYCODE = "(?:xn--[0-9a-z]+)";
private static final String SPECIAL_URL_VALID_CCTLD = "(?:(?:" + "co|tv" + ")(?=[^\\p{Alnum}@]|$))";
private static final String URL_VALID_DOMAIN =
"(?:" + // subdomains + domain + TLD
URL_VALID_SUBDOMAIN + "+" + URL_VALID_DOMAIN_NAME + // e.g. www.twitter.com, foo.co.jp, bar.co.uk
"(?:" + URL_VALID_GTLD + "|" + URL_VALID_CCTLD + "|" + URL_PUNYCODE + ")" +
")" +
"|(?:" + // domain + gTLD + some ccTLD
URL_VALID_DOMAIN_NAME + // e.g. twitter.com
"(?:" + URL_VALID_GTLD + "|" + URL_PUNYCODE + "|" + SPECIAL_URL_VALID_CCTLD + ")" +
")" +
"|(?:" + "(?<=https?://)" +
"(?:" +
"(?:" + URL_VALID_DOMAIN_NAME + URL_VALID_CCTLD + ")" + // protocol + domain + ccTLD
"|(?:" +
URL_VALID_UNICODE_CHARS + "+\\." + // protocol + unicode domain + TLD
"(?:" + URL_VALID_GTLD + "|" + URL_VALID_CCTLD + ")" +
")" +
")" +
")" +
"|(?:" + // domain + ccTLD + '/'
URL_VALID_DOMAIN_NAME + URL_VALID_CCTLD + "(?=/)" + // e.g. t.co/
")";
private static final String URL_VALID_PORT_NUMBER = "[0-9]++";
private static final String URL_VALID_GENERAL_PATH_CHARS = "[a-z0-9!\\*';:=\\+,.\\$/%#\\[\\]\\-_~\\|&@" + LATIN_ACCENTS_CHARS + "]";
/** Allow URL paths to contain up to two nested levels of balanced parens
* 1. Used in Wikipedia URLs like /Primer_(film)
* 2. Used in IIS sessions like /S(dfd346)/
* 3. Used in Rdio URLs like /track/We_Up_(Album_Version_(Edited))/
**/
private static final String URL_BALANCED_PARENS = "\\(" +
"(?:" +
URL_VALID_GENERAL_PATH_CHARS + "+" +
"|" +
// allow one nested level of balanced parentheses
"(?:" +
URL_VALID_GENERAL_PATH_CHARS + "*" +
"\\(" +
URL_VALID_GENERAL_PATH_CHARS + "+" +
"\\)" +
URL_VALID_GENERAL_PATH_CHARS + "*" +
")" +
")" +
"\\)";
/** Valid end-of-path characters (so /foo. does not gobble the period).
* 2. Allow =&# for empty URL parameters and other URL-join artifacts
**/
private static final String URL_VALID_PATH_ENDING_CHARS = "[a-z0-9=_#/\\-\\+" + LATIN_ACCENTS_CHARS + "]|(?:" + URL_BALANCED_PARENS +")";
private static final String URL_VALID_PATH = "(?:" +
"(?:" +
URL_VALID_GENERAL_PATH_CHARS + "*" +
"(?:" + URL_BALANCED_PARENS + URL_VALID_GENERAL_PATH_CHARS + "*)*" +
URL_VALID_PATH_ENDING_CHARS +
")|(?:@" + URL_VALID_GENERAL_PATH_CHARS + "+/)" +
")";
private static final String URL_VALID_URL_QUERY_CHARS = "[a-z0-9!?\\*'\\(\\);:&=\\+\\$/%#\\[\\]\\-_\\.,~\\|@]";
private static final String URL_VALID_URL_QUERY_ENDING_CHARS = "[a-z0-9_&=#/]";
private static final String VALID_URL_PATTERN_STRING =
"(" + // £8.87 total match
"(" + // £26.61 URL
"(https?://)?" + // £35.47 Protocol (optional)
"(" + URL_VALID_DOMAIN + ")" + // £44.34 Domain(s)
"(?::(" + URL_VALID_PORT_NUMBER +"))?" + // £53.21 Port number (optional)
"(/" +
URL_VALID_PATH + "*+" +
")?" + // £62.08 URL Path and anchor
"(\\?" + URL_VALID_URL_QUERY_CHARS + "*" + // £70.95 Query String
URL_VALID_URL_QUERY_ENDING_CHARS + ")?" +
")" +
")";
private static String AT_SIGNS_CHARS = "@\uFF20";
private static final String DOLLAR_SIGN_CHAR = "\\$";
private static final String CASHTAG = "[a-z]{1,6}(?:[._][a-z]{1,2})?";
/* Begin public constants */
public static final Pattern HASHTAG_PATTERN = Pattern.compile("(#|\uFF03)(" + HASHTAG_ALPHA_NUMERIC + "*" + HASHTAG_ALPHA + HASHTAG_ALPHA_NUMERIC + "*)", Pattern.CASE_INSENSITIVE);
public static final int VALID_HASHTAG_GROUP_BEFORE = 1;
public static final int VALID_HASHTAG_GROUP_HASH = 2;
public static final int VALID_HASHTAG_GROUP_TAG = 3;
public static final Pattern INVALID_HASHTAG_MATCH_END = Pattern.compile("^(?:[##]|://)");
public static final Pattern RTL_CHARACTERS = Pattern.compile("[\u0600-\u06FF\u0750-\u077F\u0590-\u05FF\uFE70-\uFEFF]");
public static final Pattern AT_SIGNS = Pattern.compile("[" + AT_SIGNS_CHARS + "]");
public static final Pattern MENTION_PATTERN = Pattern.compile("(" + AT_SIGNS + "+)([a-z0-9_]{1,20})(/[a-z][a-z0-9_\\-]{0,24})?", Pattern.CASE_INSENSITIVE);
public static final int VALID_MENTION_OR_LIST_GROUP_BEFORE = 1;
public static final int VALID_MENTION_OR_LIST_GROUP_AT = 2;
public static final int VALID_MENTION_OR_LIST_GROUP_USERNAME = 3;
public static final int VALID_MENTION_OR_LIST_GROUP_LIST = 4;
public static final Pattern VALID_REPLY = Pattern.compile("^(?:" + UNICODE_SPACES + ")*" + AT_SIGNS + "([a-z0-9_]{1,20})", Pattern.CASE_INSENSITIVE);
public static final int VALID_REPLY_GROUP_USERNAME = 1;
public static final Pattern INVALID_MENTION_MATCH_END = Pattern.compile("^(?:[" + AT_SIGNS_CHARS + LATIN_ACCENTS_CHARS + "]|://)");
public static final Pattern VALID_URL = Pattern.compile(VALID_URL_PATTERN_STRING, Pattern.CASE_INSENSITIVE);
public static final int VALID_URL_GROUP_ALL = 1;
public static final int VALID_URL_GROUP_BEFORE = 2;
public static final int VALID_URL_GROUP_URL = 3;
public static final int VALID_URL_GROUP_PROTOCOL = 4;
public static final int VALID_URL_GROUP_DOMAIN = 5;
public static final int VALID_URL_GROUP_PORT = 6;
public static final int VALID_URL_GROUP_PATH = 7;
public static final int VALID_URL_GROUP_QUERY_STRING = 8;
public static final Pattern VALID_TCO_URL = Pattern.compile("^https?:\\/\\/t\\.co\\/[a-z0-9]+", Pattern.CASE_INSENSITIVE);
public static final Pattern INVALID_URL_WITHOUT_PROTOCOL_MATCH_BEGIN = Pattern.compile("[-_./]$");
public static final Pattern CASHTAG_PATTERN = Pattern.compile("(" + DOLLAR_SIGN_CHAR + ")(" + CASHTAG + ")" +"(?=$|\\s|\\p{Punct})", Pattern.CASE_INSENSITIVE);
public static final int VALID_CASHTAG_GROUP_BEFORE = 1;
public static final int VALID_CASHTAG_GROUP_DOLLAR = 2;
public static final int VALID_CASHTAG_GROUP_CASHTAG = 3;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment