-
-
Save BinarySpoon/f257b40a28dab346c6afd7fc7447d001 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# training data | |
train_data = pd.read_csv('train.csv', encoding = 'ISO-8859-1') | |
# test data | |
test_data = pd.read_csv('test.csv', encoding = 'ISO-8859-1') | |
# filter null label rows | |
train_data = train_data[(train_data['compliance']==0) | (train_data['compliance']==1)] | |
# filter null hearing dates rows | |
train_data = train_data[~train_data['hearing_date'].isnull()] | |
# adresses data | |
address_data = pd.read_csv('addresses.csv', encoding = 'ISO-8859-1') | |
# latlon data | |
latlons_data = pd.read_csv('latlons.csv', encoding = 'ISO-8859-1') | |
# merge address and latlon | |
address_latlons = address_data.set_index('address').join(latlons_data.set_index('address'),how='left') | |
# merge adress and latlon to test and train data | |
train_data = train_data.set_index('ticket_id').join(address_latlons.set_index('ticket_id')) | |
test_data = test_data.set_index('ticket_id').join(address_latlons.set_index('ticket_id')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment