Skip to content

Instantly share code, notes, and snippets.

View sachinsdate's full-sized avatar
💭
Up to my ears in regression modeling

sachinsdate

💭
Up to my ears in regression modeling
View GitHub Profile
@sachinsdate
sachinsdate / auto_arima_py
Last active April 29, 2019 10:54
Auto Arima
import argparse
import numpy as np
import pmdarima as pm
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import t
def auto_arima(in_csv_file_path):
print('IN File==>' + in_csv_file_path)
df = pd.read_csv(in_csv_file_path, header=0, infer_datetime_format=True, parse_dates=[0], index_col=[0])
@sachinsdate
sachinsdate / tb.csv
Created April 29, 2019 10:50
Tuberculosis incidence in the United States 1974 to 2017. Source: CDC
Year Number of Cases
1974 30122
1975 33989
1976 32105
1977 30145
1978 28521
1979 27669
1980 27749
1981 27373
1982 25520
@sachinsdate
sachinsdate / linear_regression.py
Created June 19, 2019 10:49
Linear Regression
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
df = pd.read_csv('uciml_auto_city_highway_mpg.csv', header=0)
#Plot the original data set
df.plot.scatter(x='City MPG', y='Highway MPG')
plt.show()
@sachinsdate
sachinsdate / uciml_auto_city_highway_mpg.csv
Created June 19, 2019 10:54
A subset of the UC Irvine Autos dataset
City MPG Highway MPG
21 27
21 27
19 26
24 30
18 22
19 25
19 25
19 25
17 20
import matplotlib.pyplot as plt
import pandas as pd
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import seaborn as sns
df = pd.read_csv('boston_monthly_tmax_1998_2019.csv', header=0, infer_datetime_format=True, parse_dates=[0], index_col=[0])
df.plot(marker='.')
plt.show()
Date Monthly Average Maximum
1/15/1998 39.71
2/15/1998 40.97
3/15/1998 48.75
4/15/1998 56.74
5/15/1998 68.75
6/15/1998 72
7/15/1998 82.62
8/15/1998 80.2
9/15/1998 74.44
import pandas as pd
from sklearn import linear_model
#Read the data into a pandas DataFrame
df = pd.read_csv('southern_osc.csv', header=0, infer_datetime_format=True, parse_dates=[0], index_col=[0])
#add two columns containing the LAG=1 and LAG=2 version of the data to the DataFrame
df['T_(i-1)'] = df['T_i'].shift(1)
df['T_(i-2)'] = df['T_i'].shift(2)
Date T_i
Jan-51 1.5
Feb-51 0.9
Mar-51 -0.1
Apr-51 -0.3
May-51 -0.7
Jun-51 0.2
Jul-51 -1
Aug-51 -0.2
Sep-51 -1.1
@sachinsdate
sachinsdate / poisson_sim.py
Last active September 19, 2019 10:47
Simulate a Poisson process
import random
import math
_lambda = 5
_num_arrivals = 100
_arrival_time = 0
print('RAND,INTER_ARRV_T,ARRV_T')
for i in range(_num_arrivals):
@sachinsdate
sachinsdate / nyc_bb_bicyclist_counts.csv
Last active June 23, 2023 06:03
Daily total of bike counts conducted on the Brooklyn Bridge from 01 April 2017 to 31 October 2017. Source: NYC Open Data: Bicycle Counts for East River Bridges
Date HIGH_T LOW_T PRECIP BB_COUNT
1-Apr-17 46.00 37.00 0.00 606
2-Apr-17 62.10 41.00 0.00 2021
3-Apr-17 63.00 50.00 0.03 2470
4-Apr-17 51.10 46.00 1.18 723
5-Apr-17 63.00 46.00 0.00 2807
6-Apr-17 48.90 41.00 0.73 461
7-Apr-17 48.00 43.00 0.01 1222
8-Apr-17 55.90 39.90 0.00 1674
9-Apr-17 66.00 45.00 0.00 2375