Skip to content

Instantly share code, notes, and snippets.

View robsannaa's full-sized avatar
🎯
Focusing

robsanna robsannaa

🎯
Focusing
  • Poland
View GitHub Profile
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
corpus = []
for i in range(0, len(yelp)):
review = re.sub('[^a-zA-Z]', ' ', yelp['text'].values[i])
review = review.lower()
review = review.split()
ps = PorterStemmer()
review = [ps.stem(word) for word in review if not word in set(stopwords.words('english'))]
review = ' '.join(review)
@robsannaa
robsannaa / outliers.py
Created March 17, 2019 10:21
Two very simple functions to estimate the number of outliers according to the 3-sigma rule and quantile rule
import numpy as np
import pandas as pd
def get_quantile_outliers(series):
outliers_dic = {}
outliers_list = []
iqr = series.quantile(0.75) - series.quantile(0.25)
def convert_dates(df, date_column, replace_index, time_zone='CET'):
df[date_column] = pd.to_datetime(df[date_column], utc=True).dt.tz_convert(time_zone)
if replace_index == False:
return df
else:
df.index = df[date_column]
return df
df_clean = convert_dates(df_clean, 'Created at', replace_index=True)
df_clean = df_clean.resample('d').sum()
df_clean['ds'] = df_clean.index.values
df_clean.index = range(0, len(df_clean))
df_clean['ds'] = df_clean.ds.dt.date
df_clean = df_clean.rename(columns={'Total':'y'})
df_clean = df_clean[['ds', 'y']]
@robsannaa
robsannaa / iqr.py
Last active November 28, 2019 08:48
Q1 = df.quantile(0.25)
Q3 = df.quantile(0.75)
IQR = Q3 - Q1
df_quantile = df[~((df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR))).any(axis=1)]
from fbprophet import Prophet
baseline = Prophet(yearly_seasonality=True, weekly_seasonality=False)
baseline.fit(df_quantile)
@robsannaa
robsannaa / holidays_fit.py
Created November 14, 2019 20:28
holidays.py
summer_sales = pd.DataFrame({
'holiday': 'summer_sales',
'ds': pd.date_range(start='1/07/2018', end='15/08/2018'),
'lower_window': -5,
'upper_window': 0,
})
black_friday = pd.DataFrame({
'holiday': 'black_friday',
'ds': pd.date_range(start='23/11/2018', end='23/11/2018'),
m_seas = Prophet(yearly_seasonality=True, weekly_seasonality=True)
m_seas.add_seasonality(name='bim', period=60, fourier_order=2)
m_seas.add_country_holidays(country_name='IT')
m_seas.fit(df_quantile)
m_lr = Prophet(yearly_seasonality=True, weekly_seasonality=True, seasonality_prior_scale=5)
m_lr.add_seasonality(name='bim', period=90, fourier_order=2)
m_lr.add_seasonality(name='quarter', period=60, fourier_order=2)
m_lr.add_seasonality(name='m', period=30.5, fourier_order=2)
m_lr.add_country_holidays(country_name='IT')
m_lr.fit(df_quantile)