This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from itertools import combinations | |
import re | |
class MultiSourceFieldComparator: | |
def __init__(self, df, fields_to_compare, config=None): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
US Address Standardizer for DataFrame Merging (Enhanced Version) | |
Standardizes address columns in pandas DataFrames to enable consistent merging. | |
Handles various address formats, reduces ZIP codes to 5 digits, and includes fuzzy matching. | |
Requirements: | |
pip install pandas usaddress fuzzywuzzy python-levenshtein | |
""" | |
import pandas as pd | |
import usaddress |