Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from nltk.corpus import stopwords | |
from nltk.stem.porter import PorterStemmer | |
corpus = [] | |
for i in range(0, len(yelp)): | |
review = re.sub('[^a-zA-Z]', ' ', yelp['text'].values[i]) | |
review = review.lower() | |
review = review.split() | |
ps = PorterStemmer() | |
review = [ps.stem(word) for word in review if not word in set(stopwords.words('english'))] | |
review = ' '.join(review) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
def get_quantile_outliers(series): | |
outliers_dic = {} | |
outliers_list = [] | |
iqr = series.quantile(0.75) - series.quantile(0.25) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def convert_dates(df, date_column, replace_index, time_zone='CET'): | |
df[date_column] = pd.to_datetime(df[date_column], utc=True).dt.tz_convert(time_zone) | |
if replace_index == False: | |
return df | |
else: | |
df.index = df[date_column] | |
return df |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df_clean = convert_dates(df_clean, 'Created at', replace_index=True) | |
df_clean = df_clean.resample('d').sum() | |
df_clean['ds'] = df_clean.index.values | |
df_clean.index = range(0, len(df_clean)) | |
df_clean['ds'] = df_clean.ds.dt.date | |
df_clean = df_clean.rename(columns={'Total':'y'}) | |
df_clean = df_clean[['ds', 'y']] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Q1 = df.quantile(0.25) | |
Q3 = df.quantile(0.75) | |
IQR = Q3 - Q1 | |
df_quantile = df[~((df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR))).any(axis=1)] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from fbprophet import Prophet | |
baseline = Prophet(yearly_seasonality=True, weekly_seasonality=False) | |
baseline.fit(df_quantile) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
summer_sales = pd.DataFrame({ | |
'holiday': 'summer_sales', | |
'ds': pd.date_range(start='1/07/2018', end='15/08/2018'), | |
'lower_window': -5, | |
'upper_window': 0, | |
}) | |
black_friday = pd.DataFrame({ | |
'holiday': 'black_friday', | |
'ds': pd.date_range(start='23/11/2018', end='23/11/2018'), |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
m_seas = Prophet(yearly_seasonality=True, weekly_seasonality=True) | |
m_seas.add_seasonality(name='bim', period=60, fourier_order=2) | |
m_seas.add_country_holidays(country_name='IT') | |
m_seas.fit(df_quantile) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
m_lr = Prophet(yearly_seasonality=True, weekly_seasonality=True, seasonality_prior_scale=5) | |
m_lr.add_seasonality(name='bim', period=90, fourier_order=2) | |
m_lr.add_seasonality(name='quarter', period=60, fourier_order=2) | |
m_lr.add_seasonality(name='m', period=30.5, fourier_order=2) | |
m_lr.add_country_holidays(country_name='IT') | |
m_lr.fit(df_quantile) |
OlderNewer