Created
May 30, 2022 08:47
-
-
Save dmoruzzi/00b396c1b7f31636ab4b5d0808ccb1b2 to your computer and use it in GitHub Desktop.
Simple script to extract email addresses from a text file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This script will extract email addresses from a text file | |
# and print them to the screen; please repurpose this script | |
# as you see fit. | |
import re # Import regular expression module | |
def open_text_file(file_name): | |
'''Open a text file and return a file handle''' | |
try: | |
file_handle = open(file_name, 'r') | |
return file_handle | |
except FileNotFoundError: # If the file does not exist | |
print(f"Error: {file_name} not found") | |
return None | |
except PermissionError: # File is open | |
print("Error: Permission denied, please check file permissions and ensure the file is not open") | |
return None | |
except OSError: # Catch all other OS errors | |
print("Error: OS Error {OSError}") | |
return None | |
except Exception as err: | |
print(f"Unexpected error: {err}") | |
return None | |
def filter_email_addresses(file_handle): | |
'''Filter email addresses from a file handle''' | |
email_list = [] | |
email_regex = re.compile(r'[\w\.-]+@[\w\.-]+') | |
for line in file_handle: | |
for match in re.finditer(email_regex, line): | |
email_list.append(match.group()) | |
return email_list | |
def prompt_for_file_name(): | |
'''Prompt for a file name ending with .txt''' | |
file_name = str( | |
input("Enter a file name ending with .txt to extract email addresses: ")) | |
# Check if the file name ends with .txt | |
while not file_name.endswith('.txt'): | |
# If not, prompt again | |
file_name = str(input("Enter a file name ending with .txt: ")) | |
return file_name | |
def print_email_addresses(email_list): | |
'''Print email addresses from a list''' | |
for email in email_list: # Print each individual email address | |
print(email) | |
def main(): | |
file_name = prompt_for_file_name() | |
file_handle = open_text_file(file_name) | |
email_list = filter_email_addresses(file_handle) | |
if file_handle: | |
file_handle.close() # Close the file handle; let's be nice to our memory | |
if len(email_list) > 0: | |
print_email_addresses(email_list) | |
else: | |
print("No email addresses found") | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment