Skip to content

Instantly share code, notes, and snippets.

@hunt3r
Created February 20, 2012 15:03
Show Gist options
  • Save hunt3r/1869585 to your computer and use it in GitHub Desktop.
Save hunt3r/1869585 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# By Chris Hunter
import random
import re
##
# getRandomSamplingOfEmails
# Returns a deduplicated random set of n length from a given m set of emails
# Uses regex to test for email address
# Safe for n less than 0, greater than length of given mSet
# {list} mSet
# {int} n
def getRandomSamplingOfEmails(mSet, n):
rndSet = []
n = n if(n > 0) else 0
if len(mSet) > 0:
while len(rndSet) < n:
rndN = random.randint(0, len(mSet)-1)
val = mSet.pop(rndN)
if(re.match("^.+\\@(\\[?)[a-zA-Z0-9\\-\\.]+\\.([a-zA-Z]{2,3}|[0-9]{1,3})(\\]?)$", val)):
rndSet.append(val)
if(len(mSet)==0):
break
return rndSet
# run it.
def main():
nEmails = 3
mEmails = ["a@a.com", "b@a.com", "c@a.com", "notAnEmail", "garbage", "a@b.com", "b@b.com", "c@b.com"]
print(getRandomSamplingOfEmails(mEmails, nEmails))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment