Skip to content

Instantly share code, notes, and snippets.

View erykml's full-sized avatar

Eryk Lewinson erykml

View GitHub Profile
import pandas as pd
from fbprophet import Prophet
from neuralprophet import NeuralProphet
from sklearn.metrics import mean_squared_error
# plotting
import matplotlib.pyplot as plt
# settings
plt.style.use('seaborn')
class GroupImputer(BaseEstimator, TransformerMixin):
'''
Class used for imputing missing values in a pd.DataFrame using either mean or median of a group.
Parameters
----------
group_cols : list
List of columns used for calculating the aggregated value
target : str
The name of the column to impute
import quandl
# authentication ----
quandl_key = 'key' # paste your own API key here :)
quandl.ApiConfig.api_key = quandl_key
df = quandl.get('WIKI/MSFT', start_date="2000-01-01", end_date="2017-12-31")
df = df.loc[:, ['Adj. Close']]
df.columns = ['adj_close']
# create simple and log returns, multiplied by 100 for convenience
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.tsa.tsatools import lagmat
import matplotlib.pyplot as plt
# settings
class FrameExtractor():
'''
Class used for extracting frames from a video file.
'''
def __init__(self, video_path):
self.video_path = video_path
self.vid_cap = cv2.VideoCapture(video_path)
self.n_frames = int(self.vid_cap.get(cv2.CAP_PROP_FRAME_COUNT))
self.fps = int(self.vid_cap.get(cv2.CAP_PROP_FPS))
# for reproducibility
np.random.seed(42)
# generate the DataFrame with dates
range_of_dates = pd.date_range(
start="2017-01-01",
end="2020-12-30"
)
X = pd.DataFrame(index=range_of_dates)
# create a sequence of day numbers
X["day_nr"] = range(len(X))
assets = ['TSLA', 'MSFT', 'FB']
yahoo_financials = YahooFinancials(assets)
data = yahoo_financials.get_historical_price_data(start_date='2019-01-01',
end_date='2019-12-31',
time_interval='weekly')
prices_df = pd.DataFrame({
a: {x['formatted_date']: x['adjclose'] for x in data[a]['prices']} for a in assets
theilsen = TheilSenRegressor(random_state=42).fit(X, y)
fit_df["theilsen_regression"] = theilsen.predict(plotline_X)
coef_list.append(["theilsen_regression", theilsen.coef_[0]])
plt.scatter(X[inlier_mask], y[inlier_mask], color="blue", label="Inliers")
plt.scatter(X[outlier_mask], y[outlier_mask], color="red", label="Outliers")
plt.title("RANSAC - outliers vs inliers");
inlier_mask = ransac.inlier_mask_
outlier_mask = ~inlier_mask
print(f"Total outliers: {sum(outlier_mask)}")
print(f"Outliers we have added ourselves: {sum(outlier_mask[:N_OUTLIERS])} / {N_OUTLIERS}")