Skip to content

Instantly share code, notes, and snippets.

@ljuti
Created January 4, 2024 20:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ljuti/75d76b8679efcd8149b422cd266ccf20 to your computer and use it in GitHub Desktop.
Save ljuti/75d76b8679efcd8149b422cd266ccf20 to your computer and use it in GitHub Desktop.
MX Record Resolver for Email Provider
## MX Record Resolver for Email Provider
#
# This script takes a CSV file with a column of email addresses and adds a column
# with the email provider (e.g. Google, Microsoft, etc.) based on the MX record
# of the email address domain.
#
# The MX record is the DNS record that specifies the mail server responsible for
# accepting email messages on behalf of a domain name. The MX record is used to
# route email messages to the correct mail server.
#
# We can use the MX record to guess the email provider of an email address.
#
# The script uses the dnspython library to resolve the MX record of the domain.
# The script will prompt the user for the CSV file name, the name of the email
# column, and the desired output file name. The script will then save the
# processed data to a new CSV file.
#
# Requirements:
# - Python 3
# - dnspython library (pip install dnspython)
# - pandas library (pip install pandas)
#
# Usage: python mx_resolver.py
#
# Author: Lauri Jutila (@ljuti)
# URL: https://neuraljets.com
# License: MIT
import os
import pandas as pd
import dns.resolver
def get_email_provider(mx_record):
if 'google' in mx_record or 'googlemail' in mx_record:
return 'Google (Gmail)'
elif 'outlook.com' in mx_record or 'office365.com' in mx_record or 'mail.protection.outlook.com' in mx_record:
return 'Microsoft (Outlook/O365)'
else:
return 'Other'
def file_exists(filename):
return os.path.isfile(filename)
def get_valid_filename(prompt):
while True:
filename = input(prompt)
if file_exists(filename):
return filename
else:
print("File not found. Please try again.")
def get_valid_column(df, prompt):
while True:
column_name = input(prompt)
if column_name in df.columns:
return column_name
else:
print("Column not found. Please try again.")
def process_csv(input_file, email_column, output_file):
# Read CSV
df = pd.read_csv(input_file)
# Extract domain and find email provider
df['domain'] = df[email_column].apply(lambda x: x.split('@')[-1])
df['email_provider'] = ''
for index, row in df.iterrows():
try:
mx_records = dns.resolver.resolve(row['domain'], 'MX')
mx_record = str(mx_records[0].exchange).lower()
df.at[index, 'email_provider'] = get_email_provider(mx_record)
except Exception as e:
df.at[index, 'email_provider'] = 'Unknown'
# Save to new CSV
df.to_csv(output_file, index=False)
print(f"Processed data saved to {output_file}")
# User input for file names and column name
input_file = get_valid_filename("Enter the filename of the CSV: ")
df = pd.read_csv(input_file)
email_column = get_valid_column(df, "Enter the name of the email column: ")
output_file = input("Enter the desired output file name: ")
# Process the CSV
process_csv(input_file, email_column, output_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment