Skip to content

Instantly share code, notes, and snippets.

@htkcodes
Created March 23, 2021 19:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save htkcodes/b56142cbf179722bbc6542cb91b45d49 to your computer and use it in GitHub Desktop.
Save htkcodes/b56142cbf179722bbc6542cb91b45d49 to your computer and use it in GitHub Desktop.
Scrapes emails from a file
#I forked this from somewhere but i forgot where, it a had a regex bug which i fixed.
import re
fileToRead = 'emails.txt'
fileToWrite = 'emailExtracted.txt'
delimiterInFile = [',', ';']
def validateEmail(strEmail):
# .* Zero or more characters of any type.
if re.match("(.*)@(.*)\.(.*)", strEmail):
return True
return False
def writeFile(listData):
file = open(fileToWrite, 'w+')
strData = ""
for item in listData:
strData = strData+item+'\n'
file.write(strData)
listEmail = []
file = open(fileToRead, 'r')
listLine = file.readlines()
for itemLine in listLine:
item =str(itemLine)
for delimeter in delimiterInFile:
item = item.replace(str(delimeter),' ')
wordList = item.split()
for word in wordList:
if(validateEmail(word)):
listEmail.append(word)
if listEmail:
uniqEmail = set(listEmail)
print(len(uniqEmail),"emails collected!")
writeFile(uniqEmail)
else:
print("No email found.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment