Skip to content

Instantly share code, notes, and snippets.

View 3vivekb's full-sized avatar

V 3vivekb

  • San Jose, CA
View GitHub Profile
def addr_cleaner(addr):
'''Split up an address based on typical address markers,
because words after the marker tend to mess up geocoding.
addr = '1509 DENTONA PL NEW SFR LOT 3 PLAN B3'
returns - > '1509 DENTONA PL '
'''
address_words = [' AV ', ' DR ', ' PL ', ' ST ', ' WY ', ' RD ', ' LN ',' CT ',' CL ', ' BL ', ' SQ ']
# print(addr)
for ad in address_words:
new_addr = addr.split(ad)
@3vivekb
3vivekb / cleaning_commands.py
Last active January 31, 2019 18:19
Frequently used Python Pandas Data Cleaning and grouping commands
import pandas as pd
import glob
#Combine multiple csv's into one df
allFiles = glob.glob('.' + "/gps_*.csv")
frame = pd.DataFrame()
frames = []
for file_ in allFiles:
df = pd.read_csv(file_, header=0)
frames.append(df)
@3vivekb
3vivekb / useful_pandas_snippets.py
Created February 11, 2017 00:05 — forked from bsweger/useful_pandas_snippets.md
Useful Pandas Snippets
# List unique values in a DataFrame column
pd.unique(df.column_name.ravel())
# Convert Series datatype to numeric, getting rid of any non-numeric values
df['col'] = df['col'].astype(str).convert_objects(convert_numeric=True)
# Grab DataFrame rows where column has certain values
valuelist = ['value1', 'value2', 'value3']
df = df[df.column.isin(valuelist)]