Skip to content

Instantly share code, notes, and snippets.

@dkhurana1306
Created August 27, 2016 00:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dkhurana1306/facff5d03fd7a5bdf939b15432ad925b to your computer and use it in GitHub Desktop.
Save dkhurana1306/facff5d03fd7a5bdf939b15432ad925b to your computer and use it in GitHub Desktop.
import re
import pandas as pd
import numpy as np
df = pd.read_csv('/Users/dk1306/tutorial/tutorial/items.csv')
df.keys()
df.shape
np.sum(df.isnull()) #missing values each column
np.sum(df.isnull(), axis=1) #missing values each row
df.isnull().any(axis=1)
df.loc[df.isnull().any(axis=1),:]
df = df.dropna(axis=0, how='any')
df.shape
df.describe()
df.groupby('category').size()
##################################################
import datetime
def date_days(DATE):
DATE = DATE[8:]
return (datetime.date.today() -datetime.datetime.strptime(DATE, '%B %d, %Y').date()).days
df['days'] = map(date_days,df['date'])
##################################################
df['people'] = map(lambda a: int(a.replace(',', '')),df['people'])
##################################################
def current_number(current):
#print current,float(current.replace(',', ''))
if (float(current.replace(',', ''))<10):
return int(float(current.replace(',', ''))*1000)
return int(current.replace(',', ''))
df['current']=map(current_number,df['current'])
##################################################
def shares_number(shares):
shares = shares[2:]
if (shares[-1] == 'K'):
return int(float(shares[:-1])*1000)
else:
return int(shares )
df['shares'] = map(shares_number,df['shares'])
##################################################
def target_selection(target):
target = target[5:-5]
return target
def target_number(target):
if(target[0] =='\x82'):
target = target.replace('\x82\xac', '')
if(target[0] == '\xa3'):
target = target.replace('\xa3', '')
if (target[-1] == 'k'):
return int(float(target[:-1])*1000)
if (target[-1] == 'M'):
return int(float(target[:-1])*1000000)
if (float(target.replace(',', ''))<10):
return int(float(target.replace(',', ''))*1000)
else :
return int(target.replace(',', ''))
df['target']=map(target_number,map(target_selection,df['target']))
##################################################
df['percent_complt'] = abs((df['target']-df['current'])/df['target'] )*100
##################################################
df['avg_contrb'] = df['current']/df['people']
##################################################
df.describe()
data = pd.concat([df['category'],df['target'], df['current'], df['percent_complt'], df['people'],df['avg_contrb'], df['shares'],df['days']], axis=1,
keys=['category','target','current','percent_complt', 'people','avg_contrb','shares','days'])
data.to_csv('gofundme.csv')
df.describe()
data.head(5)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment