aballant01/email_validation.py Secret

## email_validation.py
#!/usr/bin/python

import re

def is_valid_email_address(email):
    """Checks whether or not an email address meets specified standards for
    composing email addresses, including proper escaping of special characters
    and domain validation

    Things missing:
    - ipv6 Domains - while it does support properly formatted ipv4 domains, i've
      yet to add in ipv6 domains
    - Does not deal with or check unicode properly - it'll treat all emails as ASCII
      strings
    - It doesn't check that the domain name is actually a proper domain name - e.g.
      passing in the domain "example.web", which wouldn't actually be allowed, will
      pass the domain check
    """
    if email.strip() == '':
        return False

    # To properly account for initial state of the validator, we need to check
    # for an escape sequence initiaion at the start of the email address
    if email[0] == '"':
        state = 'escape'
    else:
        state = "local"

    # We're going to replace the extra @'s with an escape character to ease
    # validation such that we can split up the component pieces
    parts = re.sub(r"(@)(?=.+?@)", '!!AT_SYMBOL!!', email)
    # We split up the email into the local component and the domain component
    # which should make it easier for us to perform validation (as each component
    # has different rules regarding what is and isn't valid)
    parts = parts.split('@')

    # if we weren't able to split it up into a local component and a domain
    # component, then the email address cannot be valid
    if len(parts) < 2:
        return False

    # Now that we've properly split up the components
    local = re.sub(r"!!AT_SYMBOL!!", "@", parts[0])

    skip = False
    for i, c in enumerate(local):
        # If the next character has been properly escaped (as determined below)
        # then just move on to the next character
        if skip:
            skip = False
            continue

        if state == "local":
            # Matches the text to initiate an escape sequence
            if re.match(r"(?:^|\.)\"", local[i-1:i]):
                state = "escape"
            # checks that no disallowed characters appear
            elif re.match(r"[\s\"(),:;<>@\[\]]", local[i]):
                return False
            # Ensures there are no double periods in a non-escape sequence
            elif re.search(r"\.\.", local[i:i+2]):
                return False
        elif state == "escape":
            # Checks for an escape sequence terminator (properly formatted)
            if re.match(r"[^\\]?\"(?:\.|$)", local[i-1:i+1]):
                state = "local"
            elif re.match(r"[\\]", local[i]):
                skip = True
                continue
            elif re.match(r"[^\\]\\[^\"]", local[i-1:i+1]):
                return False
    # If we haven't returned by this point, then we've found the local component
    # to be valid, so we'll check whether the domain component is valid
    return is_email_domain_valid(parts[1])

def is_email_domain_valid(domain):
    """Checks a given domain sequence (as split from an email address) to see if it
    is a valid domain name
    """
    valid_ip = re.compile('^\[?\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\]?$')

    # If the domain only has numbers, periods, and brackets, we'll treat it
    # as an IP address
    if re.match(r"^[\d\.\[\]]+$", domain):
        # If we can easily see that it's a valid IP address, then we'll call it valid
        if re.match(valid_ip, domain):
            # We need to make sure that if the IP has brackets surrounding it that
            # the brackets are matching (the regex doesn't capture that)
            if domain[0] == '[' or domain[-1] == ']':
                return domain[0] == '[' and domain[-1] == ']'
            return True
        return False

    # The domain must have at least one word character in it to be valid, so we
    # can make that check very easily
    if not re.match(r"\w{1}", domain):
        return False

    # Matches an invalid character sequence in a domain name
    # [\"\@\s\(\)] - Domain cannot have these characters in it
    # \.\.         - Cannot have two periods in sequence
    # ^[^\.]+$     - Must have at least one period in it
    # ^[-_\.]      - Cannot start with one of these characters
    # [-_\.]$      - Cannot end with one of these characters
    invalid_character_sequence = re.compile('([\"\@\s\(\)]|\.\.|^[^\.]+$|^[-_\.]|[-_\.]$)')
    if re.search(invalid_character_sequence, domain):
        return False
    return True

## email_validation_test.py
#!/usr/bin/python

import email_validation
import unittest

class EmailTest(unittest.TestCase):
    def testValidDomainsAreValid(self):
        self.assertTrue(email_validation.is_email_domain_valid("gmail.com"))
        self.assertTrue(email_validation.is_email_domain_valid("yahoo.com"))
        self.assertTrue(email_validation.is_email_domain_valid("example.co.uk"))
        self.assertTrue(email_validation.is_email_domain_valid("example-one.com"))
        self.assertTrue(email_validation.is_email_domain_valid("[123.123.123.123]"))

    def testInvalidDomainsAreInvalid(self):
        self.assertFalse(email_validation.is_email_domain_valid("123.123.123.123]"))
        self.assertFalse(email_validation.is_email_domain_valid("[123.123.123.123"))
        self.assertFalse(email_validation.is_email_domain_valid("123.123.123.1233"))

    def testValidSimpleEmailAddresses(self):
        self.assertTrue(email_validation.is_valid_email_address('niceandsimple@example.com'))
        self.assertTrue(email_validation.is_valid_email_address('very.common@example.com'))
        self.assertTrue(email_validation.is_valid_email_address('a.little.lengthy.but.fine@dept.example.com'))
        self.assertTrue(email_validation.is_valid_email_address('disposable.style.email_validation.with+symbol@example.com'))
        self.assertTrue(email_validation.is_valid_email_address('other.email-with-dash@example.com'))

    def testValidComplexEmailAddresses(self):
        self.assertTrue(email_validation.is_valid_email_address('"much.more unusual"@example.com'))
        self.assertTrue(email_validation.is_valid_email_address('"very.unusual.@.unusual.com"@example.com'))
        self.assertTrue(email_validation.is_valid_email_address('"very.(),:;<>[]\".VERY.\"very@\\ \"very\".unusual"@strange.example.com'))
        self.assertTrue(email_validation.is_valid_email_address('"()<>:,;@\\"!#$%&\'*+-/=?_`{}| ~.a"@example.org'))

    def testInvalidEmailAddresses(self):
        self.assertFalse(email_validation.is_valid_email_address('Abc.example.com'))
        self.assertFalse(email_validation.is_valid_email_address('A@b@c@example.com'))
        self.assertFalse(email_validation.is_valid_email_address('a"b(c)d,e:f;g<h>i[j\k]l@example.com'))
        self.assertFalse(email_validation.is_valid_email_address('just"not"right@example.com'))
        self.assertFalse(email_validation.is_valid_email_address('this is"not\allowed@example.com'))
        self.assertFalse(email_validation.is_valid_email_address('this\ still\"not\\allowed@example.com '))
        self.assertFalse(email_validation.is_valid_email_address('email@example'))
        self.assertFalse(email_validation.is_valid_email_address('email@-example.com'))
        self.assertFalse(email_validation.is_valid_email_address('email@111.222.333.44444'))
        self.assertFalse(email_validation.is_valid_email_address('email@example..com'))
        self.assertFalse(email_validation.is_valid_email_address('Abc..123@example.com'))


if __name__ == '__main__':
    unittest.main()
	#!/usr/bin/python

	import re

	def is_valid_email_address(email):
	"""Checks whether or not an email address meets specified standards for
	composing email addresses, including proper escaping of special characters
	and domain validation

	Things missing:
	- ipv6 Domains - while it does support properly formatted ipv4 domains, i've
	yet to add in ipv6 domains
	- Does not deal with or check unicode properly - it'll treat all emails as ASCII
	strings
	- It doesn't check that the domain name is actually a proper domain name - e.g.
	passing in the domain "example.web", which wouldn't actually be allowed, will
	pass the domain check
	"""
	if email.strip() == '':
	return False

	# To properly account for initial state of the validator, we need to check
	# for an escape sequence initiaion at the start of the email address
	if email[0] == '"':
	state = 'escape'
	else:
	state = "local"

	# We're going to replace the extra @'s with an escape character to ease
	# validation such that we can split up the component pieces
	parts = re.sub(r"(@)(?=.+?@)", '!!AT_SYMBOL!!', email)
	# We split up the email into the local component and the domain component
	# which should make it easier for us to perform validation (as each component
	# has different rules regarding what is and isn't valid)
	parts = parts.split('@')

	# if we weren't able to split it up into a local component and a domain
	# component, then the email address cannot be valid
	if len(parts) < 2:
	return False

	# Now that we've properly split up the components
	local = re.sub(r"!!AT_SYMBOL!!", "@", parts[0])

	skip = False
	for i, c in enumerate(local):
	# If the next character has been properly escaped (as determined below)
	# then just move on to the next character
	if skip:
	skip = False
	continue

	if state == "local":
	# Matches the text to initiate an escape sequence
	if re.match(r"(?:^\|\.)\"", local[i-1:i]):
	state = "escape"
	# checks that no disallowed characters appear
	elif re.match(r"[\s\"(),:;<>@\[\]]", local[i]):
	return False
	# Ensures there are no double periods in a non-escape sequence
	elif re.search(r"\.\.", local[i:i+2]):
	return False
	elif state == "escape":
	# Checks for an escape sequence terminator (properly formatted)
	if re.match(r"[^\\]?\"(?:\.\|$)", local[i-1:i+1]):
	state = "local"
	elif re.match(r"[\\]", local[i]):
	skip = True
	continue
	elif re.match(r"[^\\]\\[^\"]", local[i-1:i+1]):
	return False
	# If we haven't returned by this point, then we've found the local component
	# to be valid, so we'll check whether the domain component is valid
	return is_email_domain_valid(parts[1])

	def is_email_domain_valid(domain):
	"""Checks a given domain sequence (as split from an email address) to see if it
	is a valid domain name
	"""
	valid_ip = re.compile('^\[?\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\]?$')

	# If the domain only has numbers, periods, and brackets, we'll treat it
	# as an IP address
	if re.match(r"^[\d\.\[\]]+$", domain):
	# If we can easily see that it's a valid IP address, then we'll call it valid
	if re.match(valid_ip, domain):
	# We need to make sure that if the IP has brackets surrounding it that
	# the brackets are matching (the regex doesn't capture that)
	if domain[0] == '[' or domain[-1] == ']':
	return domain[0] == '[' and domain[-1] == ']'
	return True
	return False

	# The domain must have at least one word character in it to be valid, so we
	# can make that check very easily
	if not re.match(r"\w{1}", domain):
	return False

	# Matches an invalid character sequence in a domain name
	# [\"\@\s\(\)] - Domain cannot have these characters in it
	# \.\. - Cannot have two periods in sequence
	# ^[^\.]+$ - Must have at least one period in it
	# ^[-_\.] - Cannot start with one of these characters
	# [-_\.]$ - Cannot end with one of these characters
	invalid_character_sequence = re.compile('([\"\@\s\(\)]\|\.\.\|^[^\.]+$\|^[-_\.]\|[-_\.]$)')
	if re.search(invalid_character_sequence, domain):
	return False
	return True
	#!/usr/bin/python

	import email_validation
	import unittest

	class EmailTest(unittest.TestCase):
	def testValidDomainsAreValid(self):
	self.assertTrue(email_validation.is_email_domain_valid("gmail.com"))
	self.assertTrue(email_validation.is_email_domain_valid("yahoo.com"))
	self.assertTrue(email_validation.is_email_domain_valid("example.co.uk"))
	self.assertTrue(email_validation.is_email_domain_valid("example-one.com"))
	self.assertTrue(email_validation.is_email_domain_valid("[123.123.123.123]"))

	def testInvalidDomainsAreInvalid(self):
	self.assertFalse(email_validation.is_email_domain_valid("123.123.123.123]"))
	self.assertFalse(email_validation.is_email_domain_valid("[123.123.123.123"))
	self.assertFalse(email_validation.is_email_domain_valid("123.123.123.1233"))

	def testValidSimpleEmailAddresses(self):
	self.assertTrue(email_validation.is_valid_email_address('niceandsimple@example.com'))
	self.assertTrue(email_validation.is_valid_email_address('very.common@example.com'))
	self.assertTrue(email_validation.is_valid_email_address('a.little.lengthy.but.fine@dept.example.com'))
	self.assertTrue(email_validation.is_valid_email_address('disposable.style.email_validation.with+symbol@example.com'))
	self.assertTrue(email_validation.is_valid_email_address('other.email-with-dash@example.com'))

	def testValidComplexEmailAddresses(self):
	self.assertTrue(email_validation.is_valid_email_address('"much.more unusual"@example.com'))
	self.assertTrue(email_validation.is_valid_email_address('"very.unusual.@.unusual.com"@example.com'))
	self.assertTrue(email_validation.is_valid_email_address('"very.(),:;<>[]\".VERY.\"very@\\ \"very\".unusual"@strange.example.com'))
	self.assertTrue(email_validation.is_valid_email_address('"()<>:,;@\\"!#$%&\'*+-/=?_`{}\| ~.a"@example.org'))

	def testInvalidEmailAddresses(self):
	self.assertFalse(email_validation.is_valid_email_address('Abc.example.com'))
	self.assertFalse(email_validation.is_valid_email_address('A@b@c@example.com'))
	self.assertFalse(email_validation.is_valid_email_address('a"b(c)d,e:f;g<h>i[j\k]l@example.com'))
	self.assertFalse(email_validation.is_valid_email_address('just"not"right@example.com'))
	self.assertFalse(email_validation.is_valid_email_address('this is"not\allowed@example.com'))
	self.assertFalse(email_validation.is_valid_email_address('this\ still\"not\\allowed@example.com '))
	self.assertFalse(email_validation.is_valid_email_address('email@example'))
	self.assertFalse(email_validation.is_valid_email_address('email@-example.com'))
	self.assertFalse(email_validation.is_valid_email_address('email@111.222.333.44444'))
	self.assertFalse(email_validation.is_valid_email_address('email@example..com'))
	self.assertFalse(email_validation.is_valid_email_address('Abc..123@example.com'))


	if __name__ == '__main__':
	unittest.main()