This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE OR REPLACE TABLE ecommerce_analytics.subset_user_sessions | |
OPTIONS( | |
description="Obfuscated data from Google merchandise store" | |
) AS | |
SELECT * FROM `next-marketing-analytics.ecommerce.all_sessions_raw` | |
WHERE date = '20170101'; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class NeuralNet: | |
def __init__(self, x, y): | |
self.input = x | |
self.y = y | |
self.weights1 = np.random.rand(self.input.shape[1],4) | |
self.weights2 = np.random.rand(4,1) | |
self.output = np.zeros(y.shape) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ran_int = np.random.randint(0, 10000, size=(rows, 2)) | |
# rows to be written one by one | |
for i in range(rows): | |
pointer['Num1'] = ran_int[i, 0] | |
pointer['Num2'] = ran_int[i, 1] | |
pointer.append() | |
# this appends the data and | |
# moves the pointer one row forward | |
tab.flush() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
filename = '<path to your folder>' + 'tab.h5' | |
h5 = tb.open_file(filename, 'w') | |
# creating 2million rows for the database | |
rows = 2000000 | |
table_model = { | |
'Num1': tb.IntCol(pos=1), | |
'Num2': tb.IntCol(pos=2) | |
} | |
filters = tb.Filters(complevel=0) # no compression |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from random import gauss | |
path = '<enter the path where you want to write the file>' | |
a1 = [gauss(1.5, 2) for i in range(1000000)] | |
import pickle | |
pkl_file = open(path + "serialized_data.pkl", 'wb') | |
%time pickle.dump(a1, pkl_file) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def record_comp_time(func_list, data_list, rep=3, number=1): | |
''' Function to compare the performance of different functions. | |
Args: | |
func_list : list | |
list with function names as strings | |
data_list : list | |
list with data set names as strings | |
rep : int | |
number of repetitions of the whole comparison | |
number : int |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
plt.subplots(figsize=(12,10)) | |
list1=[] | |
# extending the list of genres to collect all the genres of all the profitable movies | |
for i in profit_data['genres']: | |
list1.extend(i) | |
genre_count_series = pd.Series(list1).value_counts()[:10].sort_values(ascending=True) | |
# output looks like |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# formatting the data in the genres columns. | |
movies_df['genres']=movies_df['genres'].str.strip('[]').str.replace(' ','').str.replace("'",'') | |
movies_df['genres']=movies_df['genres'].str.split(',') | |
movies_df.head() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#selecting the movies having profit $50M or more | |
profit_data = movies_df[movies_df['profit'] >= 50000000] | |
#reindexing new data | |
profit_data.index = range(len(profit_data)) | |
#we will start from 1 instead of 0 | |
profit_data.index = profit_data.index + 1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Most profitable year from the given dataset. | |
profits_year.idxmax() |
NewerOlder