-
-
Save alfard/e1414f9eba2967bc40c0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # coding: utf-8 | |
| # In[1]: | |
| import pandas as pd | |
| import numpy as np | |
| from sklearn import ensemble, feature_extraction, preprocessing | |
| from IPython.core.display import HTML | |
| bids = pd.read_csv('/home/alfard/Facebook-Robot/bids.csv') | |
| # In[2]: | |
| #bids.to_pickle(('/home/alfard/Facebook-Robot/test.pk')) | |
| #bids=pd.read_pickle(('/home/alfard/Facebook-Robot/test.pk')) | |
| # In[3]: | |
| #jour 1 | |
| #Bids dans 1 | |
| bids2=bids | |
| bids2['time2'] = bids2['time']/100000000000 | |
| bids_D1=bids2[(bids2['time2'] > 96318) & (bids2['time2'] < 96456)] | |
| bids_D2=bids2[(bids2['time2'] > 96954) & (bids2['time2'] < 97093)] | |
| bids_D3=bids2[(bids2['time2'] > 97591) & (bids2['time2'] < 97729)] | |
| del bids | |
| # In[4]: | |
| #Nombre de bids dans la journée 1 | |
| D1_bid=bids_D1[['bidder_id', 'bid_id']].groupby(['bidder_id']).agg(['count']) | |
| # In[5]: | |
| #D1_bid nombre de bid dans une journée | |
| #Index en valeur | |
| D1_bid['bidder_id'] = D1_bid.index | |
| D1_bid.columns=['D1_bid','bidder_id'] | |
| D1_bid = D1_bid[['bidder_id','D1_bid']] | |
| #D1_bid ok | |
| #display(HTML(D1_bid.to_html())) | |
| # In[6]: | |
| #D1_action Nombre d'action at the same time | |
| nbact=bids_D1[['bidder_id', 'auction']] | |
| nbact.shape | |
| D1_auction=nbact.groupby(nbact['bidder_id'])['auction'].apply(lambda x: len(x.unique())) | |
| D1_auction=pd.DataFrame(D1_auction) | |
| D1_auction['bidder_id'] = D1_auction.index | |
| D1_auction.columns=['D1_auction','bidder_id'] | |
| D1_auction=D1_auction[['bidder_id','D1_auction']] | |
| # In[7]: | |
| #D1 standart deviation | |
| D1_std=bids_D1[['bidder_id','time2']].groupby(['bidder_id']).agg(['std']) | |
| D1_std['bidder_id'] = D1_std.index | |
| D1_std.columns=['D1_std','bidder_id'] | |
| D1_std = D1_std[['bidder_id','D1_std']] | |
| # In[8]: | |
| print D1_bid.shape | |
| D1_bid.to_pickle(('/home/alfard/Facebook-Robot/D1_bid.pk')) | |
| print D1_auction.shape | |
| D1_auction.to_pickle(('/home/alfard/Facebook-Robot/D1_auction.pk')) | |
| print D1_std.shape | |
| D1_std.to_pickle(('/home/alfard/Facebook-Robot/D1_std.pk')) | |
| # In[9]: | |
| #DAY 2 | |
| D2_bid=bids_D2[['bidder_id', 'bid_id']].groupby(['bidder_id']).agg(['count']) | |
| D2_bid['bidder_id'] = D2_bid.index | |
| D2_bid.columns=['D2_bid','bidder_id'] | |
| D2_bid = D2_bid[['bidder_id','D2_bid']] | |
| #D2_action Nombre d'action at the same time | |
| nbact=bids_D2[['bidder_id', 'auction']] | |
| nbact.shape | |
| D2_auction=nbact.groupby(nbact['bidder_id'])['auction'].apply(lambda x: len(x.unique())) | |
| D2_auction=pd.DataFrame(D2_auction) | |
| D2_auction['bidder_id'] = D2_auction.index | |
| D2_auction.columns=['D2_auction','bidder_id'] | |
| D2_auction=D2_auction[['bidder_id','D2_auction']] | |
| #D2 standart deviation | |
| D2_std=bids_D2[['bidder_id','time2']].groupby(['bidder_id']).agg(['std']) | |
| D2_std['bidder_id'] = D2_std.index | |
| D2_std.columns=['D2_std','bidder_id'] | |
| D2_std = D2_std[['bidder_id','D2_std']] | |
| # In[10]: | |
| print D2_bid.shape | |
| D2_bid.to_pickle(('/home/alfard/Facebook-Robot/D2_bid.pk')) | |
| print D2_auction.shape | |
| D2_auction.to_pickle(('/home/alfard/Facebook-Robot/D2_auction.pk')) | |
| print D2_std.shape | |
| D2_std.to_pickle(('/home/alfard/Facebook-Robot/D2_std.pk')) | |
| # In[11]: | |
| #DAY 3 | |
| D3_bid=bids_D3[['bidder_id', 'bid_id']].groupby(['bidder_id']).agg(['count']) | |
| D3_bid['bidder_id'] = D3_bid.index | |
| D3_bid.columns=['D3_bid','bidder_id'] | |
| D3_bid = D3_bid[['bidder_id','D3_bid']] | |
| #D3_action Nombre d'action at the same time | |
| nbact=bids_D3[['bidder_id', 'auction']] | |
| nbact.shape | |
| D3_auction=nbact.groupby(nbact['bidder_id'])['auction'].apply(lambda x: len(x.unique())) | |
| D3_auction=pd.DataFrame(D3_auction) | |
| D3_auction['bidder_id'] = D3_auction.index | |
| D3_auction.columns=['D3_auction','bidder_id'] | |
| D3_auction=D3_auction[['bidder_id','D3_auction']] | |
| #D3 standart deviation | |
| D3_std=bids_D3[['bidder_id','time2']].groupby(['bidder_id']).agg(['std']) | |
| D3_std['bidder_id'] = D3_std.index | |
| D3_std.columns=['D3_std','bidder_id'] | |
| D3_std = D3_std[['bidder_id','D3_std']] | |
| # In[12]: | |
| print D3_bid.shape | |
| D3_bid.to_pickle(('/home/alfard/Facebook-Robot/D3_bid.pk')) | |
| print D3_auction.shape | |
| D3_auction.to_pickle(('/home/alfard/Facebook-Robot/D3_auction.pk')) | |
| print D3_std.shape | |
| D3_std.to_pickle(('/home/alfard/Facebook-Robot/D3_std.pk')) | |
| # In[14]: | |
| #CLEAN | |
| del bids_D1,bids_D2,bids_D3,nbact | |
| # In[2]: | |
| #COUNTRY DUMMIES | |
| country=bids[['bidder_id','country']] | |
| country_1 = pd.get_dummies(country['country']) | |
| toto=bids[['bidder_id']] | |
| #country_2=pd.concat([toto,country_1],axis=1) | |
| # In[3]: | |
| del bids | |
| # In[4]: | |
| country_3=pd.concat([toto,country_1],axis=1) | |
| # #display(HTML(T1.head(5).to_html())) | |
| # http://stackoverflow.com/questions/25145317/pandas-merge-two-dataframes-with-identical-column-names | |
| # In[5]: | |
| del country_1,toto | |
| # In[6]: | |
| country_4=country_3.groupby(['bidder_id']).agg(['sum']) | |
| # In[7]: | |
| del country_3 | |
| country_4.to_pickle(('/home/alfard/Facebook-Robot/country_4.pk')) | |
| #country_4['bidder_id'] = country_4.index | |
| #country_B=country_4.rename(columns = {('bidder_id', ''):'bidder_id'}) | |
| #del country_4 | |
| #country_B.shape | |
| # *********************************************************************************** | |
| # In[10]: | |
| country_4['bidder_id'] = country_4.index | |
| list(country_4) | |
| # In[13]: | |
| country_B=country_4.rename(columns = {('bidder_id', ''):'bidder_id'}) | |
| country_B=country_B.rename(columns = {('bidder_id', ''):'bidder_id'}) | |
| list(country_B) | |
| # In[16]: | |
| country_B['bidder_id'].head() | |
| # In[2]: | |
| #MERCHANDISES DUMMIES | |
| merch=bids[['bidder_id','merchandise']] | |
| merch_1 = pd.get_dummies(merch['merchandise']) | |
| toto=bids[['bidder_id']] | |
| merch_2=pd.concat([toto,merch_1],axis=1) | |
| # In[3]: | |
| merch_3=merch_2.groupby(['bidder_id']).agg(['sum']) | |
| # In[4]: | |
| merch_3['bidder_id'] = merch_3.index | |
| merch_B=merch_3.rename(columns = {('bidder_id', ''):'bidder_id'}) | |
| del merch,merch_1,toto,merch_2,merch_3 | |
| merch_B.to_pickle(('/home/alfard/Facebook-Robot/merch_B.pk')) | |
| merch_B.shape | |
| # ******************************************************************************************* | |
| # In[6]: | |
| #DEVICE DISTINCT | |
| nbdev=bids[['bidder_id', 'device']] | |
| nbdev.shape | |
| device_NB=nbdev.groupby(nbdev['bidder_id'])['device'].apply(lambda x: len(x.unique())) | |
| device_NB=pd.DataFrame(device_NB) | |
| device_NB['bidder_id'] = device_NB.index | |
| device_NB.columns=['device_NB','bidder_id'] | |
| device_NB=device_NB[['bidder_id','device_NB']] | |
| device_NB.to_pickle(('/home/alfard/Facebook-Robot/device_NB.pk')) | |
| device_NB.shape | |
| # ************************************************************************** | |
| # In[8]: | |
| #IP DISTINCT | |
| nbip=bids[['bidder_id', 'ip']] | |
| nbip.shape | |
| ip_NB=nbip.groupby(nbip['bidder_id'])['ip'].apply(lambda x: len(x.unique())) | |
| ip_NB=pd.DataFrame(ip_NB) | |
| ip_NB['bidder_id'] = ip_NB.index | |
| ip_NB.columns=['ip_NB','bidder_id'] | |
| ip_NB=ip_NB[['bidder_id','ip_NB']] | |
| ip_NB.to_pickle(('/home/alfard/Facebook-Robot/ip_NB.pk')) | |
| ip_NB.shape | |
| # ********************************************************************** | |
| # In[10]: | |
| #URL DISTINCT | |
| nburl=bids[['bidder_id', 'url']] | |
| nburl.shape | |
| url_NB=nburl.groupby(nburl['bidder_id'])['url'].apply(lambda x: len(x.unique())) | |
| url_NB=pd.DataFrame(url_NB) | |
| url_NB['bidder_id'] = url_NB.index | |
| url_NB.columns=['url_NB','bidder_id'] | |
| url_NB=url_NB[['bidder_id','url_NB']] | |
| url_NB.to_pickle(('/home/alfard/Facebook-Robot/url_NB.pk')) | |
| url_NB.shape | |
| # ********************************************************** | |
| # In[ ]: | |
| #Merge all table | |
| #D1_bid | |
| #D1_auction | |
| #D1_std | |
| #D2_bid | |
| #D2_auction | |
| #D2_std | |
| #D3_bid | |
| #D3_auction | |
| #D3_std | |
| #country_B | |
| #merch_B | |
| #device_NB | |
| #ip_NB | |
| #url_NB | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment