Created
January 4, 2024 20:21
-
-
Save ljuti/75d76b8679efcd8149b422cd266ccf20 to your computer and use it in GitHub Desktop.
MX Record Resolver for Email Provider
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## MX Record Resolver for Email Provider | |
# | |
# This script takes a CSV file with a column of email addresses and adds a column | |
# with the email provider (e.g. Google, Microsoft, etc.) based on the MX record | |
# of the email address domain. | |
# | |
# The MX record is the DNS record that specifies the mail server responsible for | |
# accepting email messages on behalf of a domain name. The MX record is used to | |
# route email messages to the correct mail server. | |
# | |
# We can use the MX record to guess the email provider of an email address. | |
# | |
# The script uses the dnspython library to resolve the MX record of the domain. | |
# The script will prompt the user for the CSV file name, the name of the email | |
# column, and the desired output file name. The script will then save the | |
# processed data to a new CSV file. | |
# | |
# Requirements: | |
# - Python 3 | |
# - dnspython library (pip install dnspython) | |
# - pandas library (pip install pandas) | |
# | |
# Usage: python mx_resolver.py | |
# | |
# Author: Lauri Jutila (@ljuti) | |
# URL: https://neuraljets.com | |
# License: MIT | |
import os | |
import pandas as pd | |
import dns.resolver | |
def get_email_provider(mx_record): | |
if 'google' in mx_record or 'googlemail' in mx_record: | |
return 'Google (Gmail)' | |
elif 'outlook.com' in mx_record or 'office365.com' in mx_record or 'mail.protection.outlook.com' in mx_record: | |
return 'Microsoft (Outlook/O365)' | |
else: | |
return 'Other' | |
def file_exists(filename): | |
return os.path.isfile(filename) | |
def get_valid_filename(prompt): | |
while True: | |
filename = input(prompt) | |
if file_exists(filename): | |
return filename | |
else: | |
print("File not found. Please try again.") | |
def get_valid_column(df, prompt): | |
while True: | |
column_name = input(prompt) | |
if column_name in df.columns: | |
return column_name | |
else: | |
print("Column not found. Please try again.") | |
def process_csv(input_file, email_column, output_file): | |
# Read CSV | |
df = pd.read_csv(input_file) | |
# Extract domain and find email provider | |
df['domain'] = df[email_column].apply(lambda x: x.split('@')[-1]) | |
df['email_provider'] = '' | |
for index, row in df.iterrows(): | |
try: | |
mx_records = dns.resolver.resolve(row['domain'], 'MX') | |
mx_record = str(mx_records[0].exchange).lower() | |
df.at[index, 'email_provider'] = get_email_provider(mx_record) | |
except Exception as e: | |
df.at[index, 'email_provider'] = 'Unknown' | |
# Save to new CSV | |
df.to_csv(output_file, index=False) | |
print(f"Processed data saved to {output_file}") | |
# User input for file names and column name | |
input_file = get_valid_filename("Enter the filename of the CSV: ") | |
df = pd.read_csv(input_file) | |
email_column = get_valid_column(df, "Enter the name of the email column: ") | |
output_file = input("Enter the desired output file name: ") | |
# Process the CSV | |
process_csv(input_file, email_column, output_file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment