Gerst20051/email_regex.js

## email_regex.js
(email && /^[^\s@]+@[^\s@]+\.[^\s@]+$/.test(email)) || 'Invalid Email';

const emails = [
  [0, ' simple@example.com', 'a valid address with a leading space'],
  [0, '.email@test.com', 'a . is not allowed at the beginning and/or end'],
  [0, '1234567890123456789012345678901234567890123456789012345678901234+x@example.com', 'too long'],
  [0, '@@@', 'only one @ is allowed outside quotation marks'],
  [0, 'a"b(c)d,e:f;g<h>i[j\k]l@example.com', 'none of the special characters in this local-part are allowed outside quotation marks'],
  [0, 'A@b@c@example.com', 'only one @ is allowed outside quotation marks'],
  [0, 'Abc.example.com', 'no @ character'],
  [0, 'email@test.com.', 'a . is not allowed at the beginning and/or end'],
  [0, 'john..doe@example.com', 'double dot before @'],
  [0, 'john.doe@example..com', 'double dot after @'],
  [0, 'john@aol...com', 'not valid due to consecutive dots'],
  [0, 'just"not"right@example.com', 'quoted strings must be dot separated or the only element making up the local-part'],
  [0, 'simple@example.com ', 'a valid address with a trailing space'],
  [0, 'test', ''],
  [0, 'this is"not\allowed@example.com', 'spaces, quotes, and backslashes may only exist when within quoted strings and preceded by a backslash'],
  [0, 'this\ still\"not\\allowed@example.com', 'even if escaped (preceded by a backslash), spaces, quotes, and backslashes must still be contained by quotes'],
  [1, '" "@example.org', 'space between the quotes'],
  [1, '"()<>[]:,;@\\\"!#$%&\'-/=?^_`{}| ~.a"@example.org', ''],
  [1, '"John..Doe"@example.com', 'dot . is allowed provided that it is not the first or last character unless quoted, and provided also that it does not appear consecutively unless quoted'],
  [1, '"very.(),:;<>[]\".VERY.\"very@\\ \"very\".unusual"@strange.example.com', ''],
  [1, '#!$%&\'*+-/=?^_`{}|~@example.org', ''],
  [1, '(comment)john.smith@example.com', 'comments are allowed in the domain as well as in the local-part, this is equivalent to john.smith@example.com'],
  [1, 'admin@mailserver1', 'local domain name with no TLD, although ICANN highly discourages dotless email addresses'],
  [1, 'disposable.style.email.with+symbol@example.com', ''],
  [1, 'example-indeed@strange-example.com', ''],
  [1, 'example@localhost', 'sent from localhost'],
  [1, 'example@s.solutions', 'https://en.wikipedia.org/wiki/List_of_Internet_top-level_domains'],
  [1, 'fully-qualified-domain@example.com', ''],
  [1, 'joeuser+tag@example.com', 'subaddressing, plus addressing, or tagged addressing'],
  [1, 'john.smith(comment)@example.com', 'comments are allowed in the domain as well as in the local-part, this is equivalent to john.smith@example.com'],
  [1, 'john.smith@(comment)example.com', 'comments are allowed in the domain as well as in the local-part, this is equivalent to john.smith@example.com'],
  [1, 'john.smith@example.com(comment)', 'comments are allowed in the domain as well as in the local-part, this is equivalent to john.smith@example.com'],
  [1, 'jsmith@[192.168.2.1]', 'the domain may be an ip address literal surrounded by square brackets []'],
  [1, 'jsmith@[IPv6:2001:db8::1]', 'the domain may be an ip address literal surrounded by square brackets []'],
  [1, 'other.email-with-dash@example.com', ''],
  [1, 'Pelé@example.com', 'latin alphabet with diacritics'],
  [1, 'simple@example.com', ''],
  [1, 'user.name+tag+sorting@example.com', 'will go to user.name@example.com inbox'],
  [1, 'user@[2001:DB8::1]', ''],
  [1, 'user@localserver', ''],
  [1, 'very.common@example.com', ''],
  [1, 'x@example.com', 'one-letter local-part'],
  [1, 'δοκιμή@παράδειγμα.δοκιμή', 'greek alphabet'],
  [1, 'θσερ@εχαμπλε.ψομ', ''],
  [1, 'чебурашка@ящик-с-апельсинами.рф', 'cyrillic characters'],
  [1, 'юзер@екзампл.ком', ''],
  [1, 'राम@+मोहन.ईन्फो', ''],
  [1, 'संपर्क@डाटामेल.भारत', 'devanagari characters'],
  [1, '二ノ宮@黒川.日本', 'japanese characters'],
  [1, '伊昭傑@郵件.商務', ''],
  [1, '我買@屋企.香港', 'traditional chinese characters'],
];

for (const email of emails) {
  console.log(email[1] + ' => ' + /^[\u0000-\u00ff]+$/.test(email[1]) + ' ' +  /^.+@.+\..+$/.test(email[1]));
}

for (const email of emails) {
  if (/^[\u0000-\u00ff]+$/.test(email[1]) && /^.+@.+\..+$/.test(email[1])) {
    console.log(`${email[1]} => ${email[0]}`);
  }
}

//  simple@example.com => 0
// .email@test.com => 0
// 1234567890123456789012345678901234567890123456789012345678901234+x@example.com => 0
// a"b(c)d,e:f;g<h>i[jk]l@example.com => 0
// A@b@c@example.com => 0
// email@test.com. => 0
// john..doe@example.com => 0
// john.doe@example..com => 0
// john@aol...com => 0
// just"not"right@example.com => 0
// simple@example.com  => 0
// this is"notallowed@example.com => 0
// this still"not\allowed@example.com => 0
// " "@example.org => 1
// "()<>[]:,;@\"!#$%&'-/=?^_`{}| ~.a"@example.org => 1
// "John..Doe"@example.com => 1
// "very.(),:;<>[]".VERY."very@\ "very".unusual"@strange.example.com => 1
// #!$%&'*+-/=?^_`{}|~@example.org => 1
// (comment)john.smith@example.com => 1
// disposable.style.email.with+symbol@example.com => 1
// example-indeed@strange-example.com => 1
// example@s.solutions => 1
// fully-qualified-domain@example.com => 1
// joeuser+tag@example.com => 1
// john.smith(comment)@example.com => 1
// john.smith@(comment)example.com => 1
// john.smith@example.com(comment) => 1
// jsmith@[192.168.2.1] => 1
// other.email-with-dash@example.com => 1
// Pelé@example.com => 1
// simple@example.com => 1
// user.name+tag+sorting@example.com => 1
// very.common@example.com => 1
// x@example.com => 1

for (const email of emails) {
  if (!/^[\u0000-\u00ff]+$/.test(email[1]) || !/^.+@.+\..+$/.test(email[1])) {
    console.log(`${email[1]} => ${email[0]}`);
  }
}

// @@@ => 0
// Abc.example.com => 0
// test => 0
// admin@mailserver1 => 1
// example@localhost => 1
// jsmith@[IPv6:2001:db8::1] => 1
// user@[2001:DB8::1] => 1
// user@localserver => 1
// δοκιμή@παράδειγμα.δοκιμή => 1
// θσερ@εχαμπλε.ψομ => 1
// чебурашка@ящик-с-апельсинами.рф => 1
// юзер@екзампл.ком => 1
// राम@+मोहन.ईन्फो => 1
// संपर्क@डाटामेल.भारत => 1
// 二ノ宮@黒川.日本 => 1
// 伊昭傑@郵件.商務 => 1
// 我買@屋企.香港 => 1

## email_regex.sql
SELECT * FROM email WHERE (
  (emailaddress NOT REGEXP '^[^[:space:]@]+@[^[:space:]@]+\.[^[:space:]@]+$' AND valid = True)
OR
  (emailaddress REGEXP '^[^[:space:]@]+@[^[:space:]@]+\.[^[:space:]@]+$' AND valid = False)
);

## is-valid-email-address.js
export const isValidEmailAddress = (email: string): boolean => {
  // NOTES:
  // - Limiting The Character Set To Match Our Database Column Encoding / Collation [cp1252 / latin1]
  // - Don't Allow Dotless Domains (Allowed In RFC But Prohibited By ICANN) [Could Be An IPv6 Address]
  // - Quoted Strings Are Discouraged In RFC 5321 But Not Prohibited
  // - Consecutive Dots Before The @ Is Invalid According To RFC But Valid And Ignored By Gmail
  const isValidCharset = /^[\u0000-\u00ff]+$/.test(email);
  const isValidFormat = /^.+@.+\..+$/.test(email);
  // TODO: Add Additional Checks
  // - Must Not Have Whitespace Outside Of Quotes
  // - Must Not Begin Or End With A Period
  // - Must Not Have Consecutive Periods Outside Of Quotes
  // - Limit Whole Email Address To 254 Characters
  // - Limit Local Part (Before @) To 64 Characters
  // - Limit Each Part Of The Domain Name To 63 Characters
  return isValidCharset && isValidFormat;
};

## mysql_test_schema.sql
SELECT * FROM email WHERE (
  (emailaddress NOT REGEXP '^[^[:space:]@]+@[^[:space:]@]+\.[^[:space:]@]+$' AND valid = True)
OR
  (emailaddress REGEXP '^[^[:space:]@]+@[^[:space:]@]+\.[^[:space:]@]+$' AND valid = False)
);

CREATE TABLE email (
  id int(11) unsigned NOT NULL AUTO_INCREMENT,
  valid boolean DEFAULT NULL,
  emailaddress varchar(200) DEFAULT NULL,
  notes text,
  PRIMARY KEY (id)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

INSERT INTO email (valid, emailaddress, notes)
VALUES
(0, ' simple@example.com', 'a valid address with a leading space'),
(0, '.email@test.com', 'a . is not allowed at the beginning and/or end'),
(0, '1234567890123456789012345678901234567890123456789012345678901234+x@example.com', 'too long'),
(0, '@@@', 'only one @ is allowed outside quotation marks'),
(0, 'a"b(c)d,e:f;g<h>i[j\k]l@example.com', 'none of the special characters in this local-part are allowed outside quotation marks'),
(0, 'A@b@c@example.com', 'only one @ is allowed outside quotation marks'),
(0, 'Abc.example.com', 'no @ character'),
(0, 'email@test.com.', 'a . is not allowed at the beginning and/or end'),
(0, 'john..doe@example.com', 'double dot before @'),
(0, 'john.doe@example..com', 'double dot after @'),
(0, 'john@aol...com', 'not valid due to consecutive dots'),
(0, 'just"not"right@example.com', 'quoted strings must be dot separated or the only element making up the local-part'),
(0, 'simple@example.com ', 'a valid address with a trailing space'),
(0, 'test', ''),
(0, 'this is"not\allowed@example.com', 'spaces, quotes, and backslashes may only exist when within quoted strings and preceded by a backslash'),
(0, 'this\ still\"not\\allowed@example.com', 'even if escaped (preceded by a backslash), spaces, quotes, and backslashes must still be contained by quotes'),
(1, '" "@example.org', 'space between the quotes'),
(1, '"()<>[]:,;@\\\"!#$%&\'-/=?^_`{}| ~.a"@example.org', ''),
(1, '"John..Doe"@example.com', 'dot . is allowed provided that it is not the first or last character unless quoted, and provided also that it does not appear consecutively unless quoted'),
(1, '"very.(),:;<>[]\".VERY.\"very@\\ \"very\".unusual"@strange.example.com', ''),
(1, '#!$%&\'*+-/=?^_`{}|~@example.org', ''),
(1, '(comment)john.smith@example.com', 'comments are allowed in the domain as well as in the local-part, this is equivalent to john.smith@example.com'),
(1, 'admin@mailserver1', 'local domain name with no TLD, although ICANN highly discourages dotless email addresses'),
(1, 'disposable.style.email.with+symbol@example.com', ''),
(1, 'example-indeed@strange-example.com', ''),
(1, 'example@localhost', 'sent from localhost'),
(1, 'example@s.solutions', 'https://en.wikipedia.org/wiki/List_of_Internet_top-level_domains'),
(1, 'fully-qualified-domain@example.com', ''),
(1, 'joeuser+tag@example.com', 'subaddressing, plus addressing, or tagged addressing'),
(1, 'john.smith(comment)@example.com', 'comments are allowed in the domain as well as in the local-part, this is equivalent to john.smith@example.com'),
(1, 'john.smith@(comment)example.com', 'comments are allowed in the domain as well as in the local-part, this is equivalent to john.smith@example.com'),
(1, 'john.smith@example.com(comment)', 'comments are allowed in the domain as well as in the local-part, this is equivalent to john.smith@example.com'),
(1, 'jsmith@[192.168.2.1]', 'the domain may be an ip address literal surrounded by square brackets []'),
(1, 'jsmith@[IPv6:2001:db8::1]', 'the domain may be an ip address literal surrounded by square brackets []'),
(1, 'other.email-with-dash@example.com', ''),
(1, 'Pelé@example.com', 'latin alphabet with diacritics'),
(1, 'simple@example.com', ''),
(1, 'user.name+tag+sorting@example.com', 'will go to user.name@example.com inbox'),
(1, 'user@[2001:DB8::1]', ''),
(1, 'user@localserver', ''),
(1, 'very.common@example.com', ''),
(1, 'x@example.com', 'one-letter local-part'),
(1, 'δοκιμή@παράδειγμα.δοκιμή', 'greek alphabet'),
(1, 'θσερ@εχαμπλε.ψομ', ''),
(1, 'чебурашка@ящик-с-апельсинами.рф', 'cyrillic characters'),
(1, 'юзер@екзампл.ком', ''),
(1, 'राम@+मोहन.ईन्फो', ''),
(1, 'संपर्क@डाटामेल.भारत', 'devanagari characters'),
(1, '二ノ宮@黒川.日本', 'japanese characters'),
(1, '伊昭傑@郵件.商務', ''),
(1, '我買@屋企.香港', 'traditional chinese characters');

## zreadme.md

      
    Raw
  

              zreadme.md
            
          
    Notes

RFC allows almost everything, that's why /.+@.+/ is the only way for a (simple) regex.
Most regular expressions do not cope with comments in the email address. The RFC allows comments to be arbitrarily nested. A single regular expression cannot cope with this. The Perl module pre-processes email addresses to remove comments before applying the mail regular expression.
HISTORICAL NOTE: Several of the mechanisms described in this set of documents may seem somewhat strange or even baroque at first reading. In particular, compatibility was always favored over elegance.
There is no point in trying to work out if an email address is ‘valid’. A user is far more likely to enter a wrong and valid email address than they are to enter an invalid one. Therefore, you are better off spending your time doing literally any other thing than trying to validate email addresses.
One approach could be to reduce misspellings using something like https://github.com/mailcheck/mailcheck.
HTML5 Valid Email

https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address
It's been said that it's impossible to parse email addresses using regular expressions alone. This is somewhat true. If you allow comments in email addresses, then nested comments cannot be matched with a single regexp - a simple loop applying a reducing regexp first is needed. Aside from that, the library (https://code.iamcal.com/php/rfc822/) uses some post-match checks instead of rolling everything into one regexp. This is not because it wouldn't be possible, but because it would make it huge - the number of IPv6 permutations alone would probably double the size. Aside from the practicality, it seems entirely possible to boil it down to a single regexp. However, the one used for HTML5 is not even close...
The requirement is a willful violation of RFC 5322, which defines a syntax for e-mail addresses that is simultaneously too strict (before the "@" character), too vague (after the "@" character), and too lax (allowing comments, whitespace characters, and quoted strings in manners unfamiliar to most users) to be of practical use here.
The following JavaScript and Perl compatible regular expression is an implementation of that definition.
/^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/

Email Rules (Not Comprehensive)


A . is not required. A TLD can have email addresses, or there could be an IPv6 address.

RFCs are not the end of the story: ICANN does not allow 'dotless' domains any more.

https://www.icann.org/news/announcement-2013-08-30-en (New gTLD Dotless Domain Names Prohibited)


The maximum length for an email address is 254 characters.

The local part (before the @) is limited to 64 characters and that each part of the domain name is limited to 63 characters. There's no direct limit on the number of subdomains. But the maximum length of an email address that can be handled by SMTP is 254 characters. So with a single-character local part, a two-letter top-level domain and single-character sub-domains, 125 is the maximum number of sub-domains.


The local-part of the e-mail address may use any of these ASCII characters:

Uppercase and lowercase English letters (a-z, A-Z)
Digits 0 to 9
Characters ! # $ % & ' * + - / = ? ^ _ ` { | } ~
Character . (dot, period, full stop) provided that it is not the first or last character, and provided also that it does not appear two or more times consecutively.

Additionally, quoted-strings (ie: "John Doe"@example.com) are permitted, thus allowing characters that would otherwise be prohibited, however they do not appear in common practice. RFC 5321 also warns that "a host that expects to receive mail SHOULD avoid defining mailboxes where the Local-part requires (or uses) the Quoted-string form".
Things To Consider


Gmail ignores dots in the part before @, so if your email is test@gmail.com you can send emails to test.@gmail.com or test....@gmail.com, both of those addresses are invalid according to RFC, but valid in real world.

Resources


http://emailregex.com/
http://emailregex.com/email-validation-summary/
https://www.regular-expressions.info/email.html
https://en.wikipedia.org/wiki/International_email
https://en.wikipedia.org/wiki/Email_address#Examples
https://en.wikibooks.org/wiki/JavaScript/Best_practices#Email_validation
https://en.wikipedia.org/wiki/List_of_Internet_top-level_domains
http://www.orsn.org/en/tech/tld/
http://data.iana.org/TLD/tlds-alpha-by-domain.txt
https://tools.ietf.org/html/rfc6530 (Overview and Framework for Internationalized Email)
https://stackoverflow.com/questions/46155/how-to-validate-an-email-address-in-javascript
https://stackoverflow.com/questions/201323/how-to-validate-an-email-address-using-a-regular-expression/201378#201378
https://stackoverflow.com/questions/201323/how-to-validate-an-email-address-using-a-regular-expression/1917982#1917982
https://stackoverflow.com/questions/760150/can-an-email-address-contain-international-non-english-characters/31066998#31066998
https://stackoverflow.com/questions/2049502/what-characters-are-allowed-in-an-email-address/2049510
https://stackoverflow.com/questions/24973086/are-comments-allowed-in-email-address-domain-part
https://superuser.com/questions/958156/what-is-the-purpose-of-allowing-comments-inside-email-addresses
https://fightingforalostcause.net/content/misc/2006/compare-email-regex.php
http://thedailywtf.com/articles/Validating_Email_Addresses
https://github.com/manishsaraan/email-validator/blob/master/index.js (RegEx + Function)
https://isemail.info/
http://www.dominicsayers.com/isemail/
https://github.com/dominicsayers/isemail
https://code.iamcal.com/php/rfc822/
https://code.iamcal.com/php/rfc822/demo.php
https://github.com/iamcal/rfc822/blob/master/rfc822.php
https://www.w3.org/TR/html5/forms.html#valid-e-mail-address
https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address
https://code.iamcal.com/php/rfc822/full_regexp.txt
https://www.npmjs.com/package/isemail
http://sphinx.mythic-beasts.com/~pdw/cgi-bin/emailvalidate
http://www.ex-parrot.com/~pdw/Mail-RFC822-Address.html
w3c/html#538
w3c/html#845
https://www.w3.org/Bugs/Public/show_bug.cgi?id=15489
https://www.icann.org/news/announcement-2013-08-30-en
https://tools.ietf.org/html/rfc6531
https://shkspr.mobi/blog/2014/01/poor-idn-support-from-major-webmail-providers/
https://shkspr.mobi/blog/2016/09/why-cant-you-send-email-to-a-chinese-address/
https://hackernoon.com/the-100-correct-way-to-validate-email-addresses-7c4818f24643
https://hackernoon.com/how-to-reduce-incorrect-email-addresses-df3b70cb15a9
http://blog.gerv.net/2011/05/html5_email_address_regexp/
https://rgxdb.com/r/1JWKZ0PW
https://jsfiddle.net/davidg707/835bxzas/
https://github.com/mailcheck/mailcheck
https://uasg.tech/wp-content/uploads/2017/04/Unleashing-the-Power-of-All-Domains-White-Paper.pdf
http://www.potaroo.net/reports/Universal-Acceptance/UA-Report.pdf
https://regexr.com/3dnsr
https://www.youtube.com/watch?v=JENdgiAPD6c
https://www.youtube.com/watch?v=4s9IjkMAmns
http://jsfiddle.net/gerst20051/y1puhfmk/
https://proofy.io/

Results


id
valid
emailaddress
notes


2
false
.email@test.com
a . is not allowed at the beginning and/or end


3
false
1234567890123456789012345678901234567890123456789012345678901234+x@example.com
too long


5
false
a"b(c)d,e:f;gi[jk]l@example.com
none of the special characters in this local-part are allowed outside quotation marks


8
false
email@test.com.
a . is not allowed at the beginning and/or end


9
false
john..doe@example.com
double dot before @


10
false
john.doe@example..com
double dot after @


11
false
john@aol...com
not valid due to consecutive dots


12
false
just"not"right@example.com
quoted strings must be dot separated or the only element making up the local-part


16
true
"()<>[]:,;@"!#$%&'-/=?^_`{}
~.a"@example.org


18
true
"very.(),:;<>[]".VERY."very@\ "very".unusual"@strange.example.com


## zsolution.md

      
    Raw
  

              zsolution.md
            
          
    Solution

Key Points


Email validation is messy.
Almost anything is a valid email.

Database Column

As far as I can see, latin1 was the default character set in pre-multibyte times. MySQL 4.0 (and earlier versions) only supported what amounted to a combined notion of the character set and collation with single-byte character encodings, which was specified at the server level. The default was latin1, which corresponds to a character set of latin1 and collation of latin1_swedish_ci in MySQL 4.1.
MySQL's latin1 is the same as the Windows cp1252 character set. This means it is the same as the official ISO 8859-1 or IANA (Internet Assigned Numbers Authority) latin1, except that IANA latin1 treats the code points between 0x80 and 0x9f as “undefined,” whereas cp1252, and therefore MySQL's latin1, assign characters for those positions. For example, 0x80 is the Euro sign. For the “undefined” entries in cp1252, MySQL translates 0x81 to Unicode 0x0081, 0x8d to 0x008d, 0x8f to 0x008f, 0x90 to 0x0090, and 0x9d to 0x009d.
https://stackoverflow.com/questions/3936059/why-does-mysql-use-latin1-swedish-ci-as-the-default
https://stackoverflow.com/questions/6769901/why-is-mysqls-default-collation-latin1-swedish-ci
https://stackoverflow.com/questions/7048745/what-is-the-difference-between-utf-8-and-iso-8859-1
https://www.fileformat.info/info/charset/ISO-8859-1/list.htm
Encoding: cp1252 West European (latin1)
Collation: latin1_swedish_ci
Regex Options:

Restrict To Database Column Charset

/^[\u0000-\u00ff]+$/


Check For @ (Restrict To Database Column Charset)

/^[\u0000-\u00ff]+@[\u0000-\u00ff]+\.[\u0000-\u00ff]+$/


Check For @ (Surrounded By Anything)

/^.+@.+$/
/^.+@.+\..+$/


Check For @ (More Restrictive)

/^[^\s@]+@[^\s@]+\.[^\s@]+$/


HTML5 Implementation (Came Across Multiple Versions)

/^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/
/^[a-zA-Z0-9.!#$%&’*+/=?^_`{|}~-]+@[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*$/


RFC 5322 Implementation

(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])


## zwhitepaper.md

      
    Raw
  

              zwhitepaper.md
            
          
    UNLEASHING THE POWER OF ALL DOMAINS

THE SOCIAL, CULTURAL AND ECONOMIC BENEFITS OF UNIVERSAL ACCEPTANCE

https://uasg.tech/wp-content/uploads/2017/04/Unleashing-the-Power-of-All-Domains-White-Paper.pdf
Executive Summary

In order to increase innovation, competition, and consumer choice, the Internet Corporation for
Assigned Names and Numbers (ICANN) significantly increased the number of top-level domain names
in use and made them more international. These new domain names (referred to as gTLDs and IDNs)
are not always universally accepted by all software, limiting the benefits from their introduction.
Universal Acceptance (UA) refers to the process by which software is updated to accept all valid domain
names and email addresses.
The new domains (gTLDs, Generic Top Level Domains which allow any text string using Latin letters,
and IDNs, Internationalised Domain Names which allow domain names in most of the world’s
languages) have seen strong growth in registrations. Over 25 million have been registered to date, of
which over 2.6 million are IDNs, including both country-code and top-level IDNs. This popularity
demonstrates the value that Internet users place on the new domains, and also helps to drive the key
benefits of UA.
Significant progress has been made towards UA, although acceptance is not universal. Certain industry
bodies have performed testing and research to determine this. For instance, the company responsible
for the .club domain has performed testing of 600 popular online applications to determine their
acceptance of the .club domain. It found that an average of 65% of these applications accepted the
domain. In addition, ICANN commissioned research to determine the acceptance of the new domains
by web browsers. It found that the new gTLDs were accepted 96% of the time, whereas the IDNs were
accepted 80% of the time.
Introduction

In order to increase innovation, competition, and consumer choice, the Internet Corporation for
Assigned Names and Numbers (ICANN) increased the number of top-level domain names in use and
made them more international. The number of domains in use has grown from 22 in 2012, to
over 1200 in 2016. These new domain names are not always universally accepted by all software,
limiting the benefits from their introduction. Universal Acceptance (UA) refers to the process by which
software and applications are updated to accept the new domain names as valid.
All organisations should make their software UA ready.
Here we introduce the new gTLDs and IDNs. We describe what they are, how they are structured and
why they are used. We also describe the set of entities involved in managing them, registering them,
and selling them to the public. Finally, we introduce UA and discuss implementation issues.
Introduction to the new gTLDs and IDNs

ICANN has released a new set of gTLDs with registrations. They are intended to
address limitations in the traditional set of top-level domain names, which originally ended in three
letters (e.g. .com, .net, .gov, .org) or two character country codes. In addition, traditional top-level
domains were expressed in ASCII characters. These limitations restricted the identities available to
Internet users and website owners, limiting brand and name recognition.
A brief history of gTLDs

There were originally seven gTLDs: .com, .org, .gov, .edu, .mil, .net and .int.
In the early 2000s, ICANN introduced a number of new ones. These included the first ones with
more than three characters (such as .museum and .info) and also the first sponsored domains,
where the registry controlling the domain ensured that the organisations using them met certain
criteria, such as .aero, sponsored by SITA, being reserved for airlines, airports and other parts of
the air travel industry.
More recently, ICANN implemented the latest significant expansion, with over 1200 new gTLDs
authorised, including about 100 top-level IDNs (a number of country-code IDNs have also been
registered).
The new gTLDs include domains that end in three or more characters. This allows website owners to
create online identities that better match their requirements. Examples include generic domains such as
.photography and .blog; industry domains such as .bank and .insurance; geographical domains based on
cities or regions, such as .london, .berlin or .bayern; and brand domains, such as .bmw, .google and
.apple.
In addition, IDNs seek to address the limitations of traditional domain names for Internet users who
speak a language with a writing system other than the Latin alphabet. Instead of restricting domain
names to ASCII characters, IDNs allow Unicode characters to be used. Unicode allows the encoding,
representing and handling of characters in many more languages.
This means that IDNs allow domains to be expressed in most of the world’s written languages, and not
just English. Examples include French (.musée), German or Spanish, Russian (.ОНЛАЙH), and
Chinese (.网络, and .信息). In contrast to country-code TLDs (ccTLDs) that use ASCII characters,
country-code IDNs (e.g. .ОНЛАЙН.рф) require UA. This is because their use of Unicode characters in
the fields before the country code means that they may not necessarily be accepted by existing software
and applications.
Used domains

Domain names have been registered in over 1200 gTLDs. These include generally useful text strings
such as .online, .photography or .club, as well as brand-specific domains (.sony, .audi, .barclays), and
geographical domains (.bayern, .london, .paris). These registrations are concentrated in the top-ten
domains, which make up 64% of the total. A large proportion of the recent growth in registrations (40%
of the registrations between Q4 2015 and Q2 2016 were accounted for by the .xyz domain).
Universal Acceptance

Based on our research, we believe that the key impediment to UA is awareness of the issue, rather than
the cost of implementation. In our case studies, below, anytime there has been an acceptance issue
flagged to an organisation, it has been addressed successfully and quickly.
Prior to the creation of the new gTLDs and IDNs, this validation process was simple. Domain names
and email addresses took a limited number of forms. The software simply needed to check against a
limited list of top-level domains (e.g. .com, .net, .org, .gov, .edu), and country codes (e.g. .au, .de, .uk,
.fr). In some ways, the problem with UA is similar to the Y2K issue, where software or application
developers coded in the last two digits of year, rather than all four. In this case, some simple domain
name acceptance rules were hard-coded, such as ensuring that the top-level domains were two or three
characters, or even checking against the limited list of gTLDs. Luckily, the issue does not appear to be
as deeply embedded as Y2K in the cases we examined.
In order to implement UA, this validation process must be extended to include all registered gTLDs and
IDNs.
The key factor that may hold back software and application owners from implementing UA is therefore
not technical, but to do with awareness of the problem and willingness to rectify it.
The likelihood of these fixes and developments being made depends in part on the nature of the
organisation.
Conclusion

It is our view that, for software and application owners implementing UA, the benefits of doing so will
outweigh the costs, and that the main impediment, from the interviews that we conducted, was
awareness of the issue, which was then typically quickly resolved.
The benefits of UA to software and application owners accrue from the wide usage of the new domains.
As we have seen, there have been almost 25 million domain names registered under the new domains.
There have also been over 2.6 million IDNs, registered, both country-code and top-level. (largely
concentrated in the Russian and Chinese languages). This provides a large, and rapidly growing, pool
of Internet users who will want to use their new domains to sign up for and interact with applications
and services. Organisations should ensure that they are able to capture the interest of all users identifying
themselves online with these new domains.
In addition, as more software and application owners implement UA, the attractiveness of the new
domains will increase. This will encourage further registrations, which will in turn generate a larger
customer pool for applications.
The costs of implementing UA do depend on the original design of the software in question. However,
the existence of a centralised, continuously updated database of all the new registered domains, together
with documentation provided by UASG, means that this task is rarely onerous. In our research, we did
not come across a situation where there had been serious technical difficulties in implementing UA.
In addition, we estimate that the benefits, in terms of spending by existing users of gTLDs and IDNs as
well as new users, is significant, and justifies the cost of UA readiness. We have estimated that universal
acceptance of gTLDs could generate an additional annual benefit of USD 3.6 billion in new ecommerce revenues, and that universal acceptance of IDNs could bring 17 million more Internet users
online, with incremental annual spend online of USD 6.2 billion. These are conservative estimates in our view.
These factors combine to mean that it makes sense for application and software owners to implement
UA, so that the Internet continues to realise its full cultural, social and economic benefits.
	(email && /^[^\s@]+@[^\s@]+\.[^\s@]+$/.test(email)) \|\| 'Invalid Email';

	const emails = [
	[0, ' simple@example.com', 'a valid address with a leading space'],
	[0, '.email@test.com', 'a . is not allowed at the beginning and/or end'],
	[0, '1234567890123456789012345678901234567890123456789012345678901234+x@example.com', 'too long'],
	[0, '@@@', 'only one @ is allowed outside quotation marks'],
	[0, 'a"b(c)d,e:f;g<h>i[j\k]l@example.com', 'none of the special characters in this local-part are allowed outside quotation marks'],
	[0, 'A@b@c@example.com', 'only one @ is allowed outside quotation marks'],
	[0, 'Abc.example.com', 'no @ character'],
	[0, 'email@test.com.', 'a . is not allowed at the beginning and/or end'],
	[0, 'john..doe@example.com', 'double dot before @'],
	[0, 'john.doe@example..com', 'double dot after @'],
	[0, 'john@aol...com', 'not valid due to consecutive dots'],
	[0, 'just"not"right@example.com', 'quoted strings must be dot separated or the only element making up the local-part'],
	[0, 'simple@example.com ', 'a valid address with a trailing space'],
	[0, 'test', ''],
	[0, 'this is"not\allowed@example.com', 'spaces, quotes, and backslashes may only exist when within quoted strings and preceded by a backslash'],
	[0, 'this\ still\"not\\allowed@example.com', 'even if escaped (preceded by a backslash), spaces, quotes, and backslashes must still be contained by quotes'],
	[1, '" "@example.org', 'space between the quotes'],
	[1, '"()<>[]:,;@\\\"!#$%&\'-/=?^_`{}\| ~.a"@example.org', ''],
	[1, '"John..Doe"@example.com', 'dot . is allowed provided that it is not the first or last character unless quoted, and provided also that it does not appear consecutively unless quoted'],
	[1, '"very.(),:;<>[]\".VERY.\"very@\\ \"very\".unusual"@strange.example.com', ''],
	[1, '#!$%&\'*+-/=?^_`{}\|~@example.org', ''],
	[1, '(comment)john.smith@example.com', 'comments are allowed in the domain as well as in the local-part, this is equivalent to john.smith@example.com'],
	[1, 'admin@mailserver1', 'local domain name with no TLD, although ICANN highly discourages dotless email addresses'],
	[1, 'disposable.style.email.with+symbol@example.com', ''],
	[1, 'example-indeed@strange-example.com', ''],
	[1, 'example@localhost', 'sent from localhost'],
	[1, 'example@s.solutions', 'https://en.wikipedia.org/wiki/List_of_Internet_top-level_domains'],
	[1, 'fully-qualified-domain@example.com', ''],
	[1, 'joeuser+tag@example.com', 'subaddressing, plus addressing, or tagged addressing'],
	[1, 'john.smith(comment)@example.com', 'comments are allowed in the domain as well as in the local-part, this is equivalent to john.smith@example.com'],
	[1, 'john.smith@(comment)example.com', 'comments are allowed in the domain as well as in the local-part, this is equivalent to john.smith@example.com'],
	[1, 'john.smith@example.com(comment)', 'comments are allowed in the domain as well as in the local-part, this is equivalent to john.smith@example.com'],
	[1, 'jsmith@[192.168.2.1]', 'the domain may be an ip address literal surrounded by square brackets []'],
	[1, 'jsmith@[IPv6:2001:db8::1]', 'the domain may be an ip address literal surrounded by square brackets []'],
	[1, 'other.email-with-dash@example.com', ''],
	[1, 'Pelé@example.com', 'latin alphabet with diacritics'],
	[1, 'simple@example.com', ''],
	[1, 'user.name+tag+sorting@example.com', 'will go to user.name@example.com inbox'],
	[1, 'user@[2001:DB8::1]', ''],
	[1, 'user@localserver', ''],
	[1, 'very.common@example.com', ''],
	[1, 'x@example.com', 'one-letter local-part'],
	[1, 'δοκιμή@παράδειγμα.δοκιμή', 'greek alphabet'],
	[1, 'θσερ@εχαμπλε.ψομ', ''],
	[1, 'чебурашка@ящик-с-апельсинами.рф', 'cyrillic characters'],
	[1, 'юзер@екзампл.ком', ''],
	[1, 'राम@+मोहन.ईन्फो', ''],
	[1, 'संपर्क@डाटामेल.भारत', 'devanagari characters'],
	[1, '二ノ宮@黒川.日本', 'japanese characters'],
	[1, '伊昭傑@郵件.商務', ''],
	[1, '我買@屋企.香港', 'traditional chinese characters'],
	];

	for (const email of emails) {
	console.log(email[1] + ' => ' + /^[\u0000-\u00ff]+$/.test(email[1]) + ' ' + /^.+@.+\..+$/.test(email[1]));
	}

	for (const email of emails) {
	if (/^[\u0000-\u00ff]+$/.test(email[1]) && /^.+@.+\..+$/.test(email[1])) {
	console.log(`${email[1]} => ${email[0]}`);
	}
	}

	// simple@example.com => 0
	// .email@test.com => 0
	// 1234567890123456789012345678901234567890123456789012345678901234+x@example.com => 0
	// a"b(c)d,e:f;g<h>i[jk]l@example.com => 0
	// A@b@c@example.com => 0
	// email@test.com. => 0
	// john..doe@example.com => 0
	// john.doe@example..com => 0
	// john@aol...com => 0
	// just"not"right@example.com => 0
	// simple@example.com => 0
	// this is"notallowed@example.com => 0
	// this still"not\allowed@example.com => 0
	// " "@example.org => 1
	// "()<>[]:,;@\"!#$%&'-/=?^_`{}\| ~.a"@example.org => 1
	// "John..Doe"@example.com => 1
	// "very.(),:;<>[]".VERY."very@\ "very".unusual"@strange.example.com => 1
	// #!$%&'*+-/=?^_`{}\|~@example.org => 1
	// (comment)john.smith@example.com => 1
	// disposable.style.email.with+symbol@example.com => 1
	// example-indeed@strange-example.com => 1
	// example@s.solutions => 1
	// fully-qualified-domain@example.com => 1
	// joeuser+tag@example.com => 1
	// john.smith(comment)@example.com => 1
	// john.smith@(comment)example.com => 1
	// john.smith@example.com(comment) => 1
	// jsmith@[192.168.2.1] => 1
	// other.email-with-dash@example.com => 1
	// Pelé@example.com => 1
	// simple@example.com => 1
	// user.name+tag+sorting@example.com => 1
	// very.common@example.com => 1
	// x@example.com => 1

	for (const email of emails) {
	if (!/^[\u0000-\u00ff]+$/.test(email[1]) \|\| !/^.+@.+\..+$/.test(email[1])) {
	console.log(`${email[1]} => ${email[0]}`);
	}
	}

	// @@@ => 0
	// Abc.example.com => 0
	// test => 0
	// admin@mailserver1 => 1
	// example@localhost => 1
	// jsmith@[IPv6:2001:db8::1] => 1
	// user@[2001:DB8::1] => 1
	// user@localserver => 1
	// δοκιμή@παράδειγμα.δοκιμή => 1
	// θσερ@εχαμπλε.ψομ => 1
	// чебурашка@ящик-с-апельсинами.рф => 1
	// юзер@екзампл.ком => 1
	// राम@+मोहन.ईन्फो => 1
	// संपर्क@डाटामेल.भारत => 1
	// 二ノ宮@黒川.日本 => 1
	// 伊昭傑@郵件.商務 => 1
	// 我買@屋企.香港 => 1
	SELECT * FROM email WHERE (
	(emailaddress NOT REGEXP '^[^[:space:]@]+@[^[:space:]@]+\.[^[:space:]@]+$' AND valid = True)
	OR
	(emailaddress REGEXP '^[^[:space:]@]+@[^[:space:]@]+\.[^[:space:]@]+$' AND valid = False)
	);
	export const isValidEmailAddress = (email: string): boolean => {
	// NOTES:
	// - Limiting The Character Set To Match Our Database Column Encoding / Collation [cp1252 / latin1]
	// - Don't Allow Dotless Domains (Allowed In RFC But Prohibited By ICANN) [Could Be An IPv6 Address]
	// - Quoted Strings Are Discouraged In RFC 5321 But Not Prohibited
	// - Consecutive Dots Before The @ Is Invalid According To RFC But Valid And Ignored By Gmail
	const isValidCharset = /^[\u0000-\u00ff]+$/.test(email);
	const isValidFormat = /^.+@.+\..+$/.test(email);
	// TODO: Add Additional Checks
	// - Must Not Have Whitespace Outside Of Quotes
	// - Must Not Begin Or End With A Period
	// - Must Not Have Consecutive Periods Outside Of Quotes
	// - Limit Whole Email Address To 254 Characters
	// - Limit Local Part (Before @) To 64 Characters
	// - Limit Each Part Of The Domain Name To 63 Characters
	return isValidCharset && isValidFormat;
	};
id	valid	emailaddress	notes
2	false	.email@test.com	a . is not allowed at the beginning and/or end
3	false	1234567890123456789012345678901234567890123456789012345678901234+x@example.com	too long
5	false	a"b(c)d,e:f;gi[jk]l@example.com	none of the special characters in this local-part are allowed outside quotation marks
8	false	email@test.com.	a . is not allowed at the beginning and/or end
9	false	john..doe@example.com	double dot before @
10	false	john.doe@example..com	double dot after @
11	false	john@aol...com	not valid due to consecutive dots
12	false	just"not"right@example.com	quoted strings must be dot separated or the only element making up the local-part
16	true	"()<>[]:,;@"!#$%&'-/=?^_`{}	~.a"@example.org
18	true	"very.(),:;<>[]".VERY."very@\ "very".unusual"@strange.example.com