Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
# Before overwriting keep track of suspect rows with new binary columns
dfX['gps_height_bad'] = (dfX['gps_height']<=0)*1
geos.append('gps_height_bad')
dfX['longitude_bad'] = (dfX['longitude']<25)*1
geos.append('longitude_bad')
dfX['latitude_bad'] = (dfX['latitude']>-0.5)*1
geos.append('latitude_bad')
# Exemple of query via index=basin : mean_geo_df.at['Lake Victoria','latitude']
dfX.loc[dfX['gps_height']<=0, 'gps_height'] = dfX['basin'].apply(lambda x : mean_geo_df.at[x,'gps_height'])
dfX.loc[dfX['longitude']<25, 'longitude'] = dfX['basin'].apply(lambda x : mean_geo_df.at[x,'longitude'])
dfX.loc[dfX['latitude']>-0.5, 'latitude'] = dfX['basin'].apply(lambda x : mean_geo_df.at[x,'latitude'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment