Skip to content

Instantly share code, notes, and snippets.

@daddiofaddio
Last active May 23, 2024 09:22
Show Gist options
  • Save daddiofaddio/19b5d8172976795af3183db1d83c43ee to your computer and use it in GitHub Desktop.
Save daddiofaddio/19b5d8172976795af3183db1d83c43ee to your computer and use it in GitHub Desktop.
Python - Extract & parse raw scraped identification data and reformat to "human name" data values (nameparser/HumanName imports)
from nameparser import HumanName
import pandas as pd
def parse_names_from_file(input_file_path, output_file_path):
# Read the names from a file
with open(input_file_path, 'r') as file:
names = file.read().splitlines()
# Parse the names using the nameparser library
parsed_names = []
for name in names:
human_name = HumanName(name)
parsed_names.append({
'title': human_name.title,
'first_name': human_name.first,
'middle_name': human_name.middle,
'last_name': human_name.last,
'suffix': human_name.suffix
})
# Convert the parsed names to a DataFrame
df = pd.DataFrame(parsed_names)
# Write the DataFrame to a CSV file
df.to_csv(output_file_path, index=False)
# Define the path to your file of names and the output file
input_file_path = '/path/to/dir/names.txt'
output_file_path = '/path/to/dir/names_formatted.csv'
# Run the function
parse_names_from_file(input_file_path, output_file_path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment