Last active
May 23, 2024 09:22
-
-
Save daddiofaddio/19b5d8172976795af3183db1d83c43ee to your computer and use it in GitHub Desktop.
Python - Extract & parse raw scraped identification data and reformat to "human name" data values (nameparser/HumanName imports)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from nameparser import HumanName | |
import pandas as pd | |
def parse_names_from_file(input_file_path, output_file_path): | |
# Read the names from a file | |
with open(input_file_path, 'r') as file: | |
names = file.read().splitlines() | |
# Parse the names using the nameparser library | |
parsed_names = [] | |
for name in names: | |
human_name = HumanName(name) | |
parsed_names.append({ | |
'title': human_name.title, | |
'first_name': human_name.first, | |
'middle_name': human_name.middle, | |
'last_name': human_name.last, | |
'suffix': human_name.suffix | |
}) | |
# Convert the parsed names to a DataFrame | |
df = pd.DataFrame(parsed_names) | |
# Write the DataFrame to a CSV file | |
df.to_csv(output_file_path, index=False) | |
# Define the path to your file of names and the output file | |
input_file_path = '/path/to/dir/names.txt' | |
output_file_path = '/path/to/dir/names_formatted.csv' | |
# Run the function | |
parse_names_from_file(input_file_path, output_file_path) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment