Skip to content

Instantly share code, notes, and snippets.

@alfard
Created June 3, 2015 20:22
Show Gist options
  • Select an option

  • Save alfard/e1414f9eba2967bc40c0 to your computer and use it in GitHub Desktop.

Select an option

Save alfard/e1414f9eba2967bc40c0 to your computer and use it in GitHub Desktop.
# coding: utf-8
# In[1]:
import pandas as pd
import numpy as np
from sklearn import ensemble, feature_extraction, preprocessing
from IPython.core.display import HTML
bids = pd.read_csv('/home/alfard/Facebook-Robot/bids.csv')
# In[2]:
#bids.to_pickle(('/home/alfard/Facebook-Robot/test.pk'))
#bids=pd.read_pickle(('/home/alfard/Facebook-Robot/test.pk'))
# In[3]:
#jour 1
#Bids dans 1
bids2=bids
bids2['time2'] = bids2['time']/100000000000
bids_D1=bids2[(bids2['time2'] > 96318) & (bids2['time2'] < 96456)]
bids_D2=bids2[(bids2['time2'] > 96954) & (bids2['time2'] < 97093)]
bids_D3=bids2[(bids2['time2'] > 97591) & (bids2['time2'] < 97729)]
del bids
# In[4]:
#Nombre de bids dans la journée 1
D1_bid=bids_D1[['bidder_id', 'bid_id']].groupby(['bidder_id']).agg(['count'])
# In[5]:
#D1_bid nombre de bid dans une journée
#Index en valeur
D1_bid['bidder_id'] = D1_bid.index
D1_bid.columns=['D1_bid','bidder_id']
D1_bid = D1_bid[['bidder_id','D1_bid']]
#D1_bid ok
#display(HTML(D1_bid.to_html()))
# In[6]:
#D1_action Nombre d'action at the same time
nbact=bids_D1[['bidder_id', 'auction']]
nbact.shape
D1_auction=nbact.groupby(nbact['bidder_id'])['auction'].apply(lambda x: len(x.unique()))
D1_auction=pd.DataFrame(D1_auction)
D1_auction['bidder_id'] = D1_auction.index
D1_auction.columns=['D1_auction','bidder_id']
D1_auction=D1_auction[['bidder_id','D1_auction']]
# In[7]:
#D1 standart deviation
D1_std=bids_D1[['bidder_id','time2']].groupby(['bidder_id']).agg(['std'])
D1_std['bidder_id'] = D1_std.index
D1_std.columns=['D1_std','bidder_id']
D1_std = D1_std[['bidder_id','D1_std']]
# In[8]:
print D1_bid.shape
D1_bid.to_pickle(('/home/alfard/Facebook-Robot/D1_bid.pk'))
print D1_auction.shape
D1_auction.to_pickle(('/home/alfard/Facebook-Robot/D1_auction.pk'))
print D1_std.shape
D1_std.to_pickle(('/home/alfard/Facebook-Robot/D1_std.pk'))
# In[9]:
#DAY 2
D2_bid=bids_D2[['bidder_id', 'bid_id']].groupby(['bidder_id']).agg(['count'])
D2_bid['bidder_id'] = D2_bid.index
D2_bid.columns=['D2_bid','bidder_id']
D2_bid = D2_bid[['bidder_id','D2_bid']]
#D2_action Nombre d'action at the same time
nbact=bids_D2[['bidder_id', 'auction']]
nbact.shape
D2_auction=nbact.groupby(nbact['bidder_id'])['auction'].apply(lambda x: len(x.unique()))
D2_auction=pd.DataFrame(D2_auction)
D2_auction['bidder_id'] = D2_auction.index
D2_auction.columns=['D2_auction','bidder_id']
D2_auction=D2_auction[['bidder_id','D2_auction']]
#D2 standart deviation
D2_std=bids_D2[['bidder_id','time2']].groupby(['bidder_id']).agg(['std'])
D2_std['bidder_id'] = D2_std.index
D2_std.columns=['D2_std','bidder_id']
D2_std = D2_std[['bidder_id','D2_std']]
# In[10]:
print D2_bid.shape
D2_bid.to_pickle(('/home/alfard/Facebook-Robot/D2_bid.pk'))
print D2_auction.shape
D2_auction.to_pickle(('/home/alfard/Facebook-Robot/D2_auction.pk'))
print D2_std.shape
D2_std.to_pickle(('/home/alfard/Facebook-Robot/D2_std.pk'))
# In[11]:
#DAY 3
D3_bid=bids_D3[['bidder_id', 'bid_id']].groupby(['bidder_id']).agg(['count'])
D3_bid['bidder_id'] = D3_bid.index
D3_bid.columns=['D3_bid','bidder_id']
D3_bid = D3_bid[['bidder_id','D3_bid']]
#D3_action Nombre d'action at the same time
nbact=bids_D3[['bidder_id', 'auction']]
nbact.shape
D3_auction=nbact.groupby(nbact['bidder_id'])['auction'].apply(lambda x: len(x.unique()))
D3_auction=pd.DataFrame(D3_auction)
D3_auction['bidder_id'] = D3_auction.index
D3_auction.columns=['D3_auction','bidder_id']
D3_auction=D3_auction[['bidder_id','D3_auction']]
#D3 standart deviation
D3_std=bids_D3[['bidder_id','time2']].groupby(['bidder_id']).agg(['std'])
D3_std['bidder_id'] = D3_std.index
D3_std.columns=['D3_std','bidder_id']
D3_std = D3_std[['bidder_id','D3_std']]
# In[12]:
print D3_bid.shape
D3_bid.to_pickle(('/home/alfard/Facebook-Robot/D3_bid.pk'))
print D3_auction.shape
D3_auction.to_pickle(('/home/alfard/Facebook-Robot/D3_auction.pk'))
print D3_std.shape
D3_std.to_pickle(('/home/alfard/Facebook-Robot/D3_std.pk'))
# In[14]:
#CLEAN
del bids_D1,bids_D2,bids_D3,nbact
# In[2]:
#COUNTRY DUMMIES
country=bids[['bidder_id','country']]
country_1 = pd.get_dummies(country['country'])
toto=bids[['bidder_id']]
#country_2=pd.concat([toto,country_1],axis=1)
# In[3]:
del bids
# In[4]:
country_3=pd.concat([toto,country_1],axis=1)
# #display(HTML(T1.head(5).to_html()))
# http://stackoverflow.com/questions/25145317/pandas-merge-two-dataframes-with-identical-column-names
# In[5]:
del country_1,toto
# In[6]:
country_4=country_3.groupby(['bidder_id']).agg(['sum'])
# In[7]:
del country_3
country_4.to_pickle(('/home/alfard/Facebook-Robot/country_4.pk'))
#country_4['bidder_id'] = country_4.index
#country_B=country_4.rename(columns = {('bidder_id', ''):'bidder_id'})
#del country_4
#country_B.shape
# ***********************************************************************************
# In[10]:
country_4['bidder_id'] = country_4.index
list(country_4)
# In[13]:
country_B=country_4.rename(columns = {('bidder_id', ''):'bidder_id'})
country_B=country_B.rename(columns = {('bidder_id', ''):'bidder_id'})
list(country_B)
# In[16]:
country_B['bidder_id'].head()
# In[2]:
#MERCHANDISES DUMMIES
merch=bids[['bidder_id','merchandise']]
merch_1 = pd.get_dummies(merch['merchandise'])
toto=bids[['bidder_id']]
merch_2=pd.concat([toto,merch_1],axis=1)
# In[3]:
merch_3=merch_2.groupby(['bidder_id']).agg(['sum'])
# In[4]:
merch_3['bidder_id'] = merch_3.index
merch_B=merch_3.rename(columns = {('bidder_id', ''):'bidder_id'})
del merch,merch_1,toto,merch_2,merch_3
merch_B.to_pickle(('/home/alfard/Facebook-Robot/merch_B.pk'))
merch_B.shape
# *******************************************************************************************
# In[6]:
#DEVICE DISTINCT
nbdev=bids[['bidder_id', 'device']]
nbdev.shape
device_NB=nbdev.groupby(nbdev['bidder_id'])['device'].apply(lambda x: len(x.unique()))
device_NB=pd.DataFrame(device_NB)
device_NB['bidder_id'] = device_NB.index
device_NB.columns=['device_NB','bidder_id']
device_NB=device_NB[['bidder_id','device_NB']]
device_NB.to_pickle(('/home/alfard/Facebook-Robot/device_NB.pk'))
device_NB.shape
# **************************************************************************
# In[8]:
#IP DISTINCT
nbip=bids[['bidder_id', 'ip']]
nbip.shape
ip_NB=nbip.groupby(nbip['bidder_id'])['ip'].apply(lambda x: len(x.unique()))
ip_NB=pd.DataFrame(ip_NB)
ip_NB['bidder_id'] = ip_NB.index
ip_NB.columns=['ip_NB','bidder_id']
ip_NB=ip_NB[['bidder_id','ip_NB']]
ip_NB.to_pickle(('/home/alfard/Facebook-Robot/ip_NB.pk'))
ip_NB.shape
# **********************************************************************
# In[10]:
#URL DISTINCT
nburl=bids[['bidder_id', 'url']]
nburl.shape
url_NB=nburl.groupby(nburl['bidder_id'])['url'].apply(lambda x: len(x.unique()))
url_NB=pd.DataFrame(url_NB)
url_NB['bidder_id'] = url_NB.index
url_NB.columns=['url_NB','bidder_id']
url_NB=url_NB[['bidder_id','url_NB']]
url_NB.to_pickle(('/home/alfard/Facebook-Robot/url_NB.pk'))
url_NB.shape
# **********************************************************
# In[ ]:
#Merge all table
#D1_bid
#D1_auction
#D1_std
#D2_bid
#D2_auction
#D2_std
#D3_bid
#D3_auction
#D3_std
#country_B
#merch_B
#device_NB
#ip_NB
#url_NB
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment