sachinsdate

## auto_arima_py
import argparse
import numpy as np
import pmdarima as pm
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import t

def auto_arima(in_csv_file_path):
	print('IN File==>' + in_csv_file_path)
	df = pd.read_csv(in_csv_file_path, header=0, infer_datetime_format=True, parse_dates=[0], index_col=[0])

## tb.csv

          
            Year
            Number of Cases

            
              1974
              30122

            
              1975
              33989

            
              1976
              32105

            
              1977
              30145

            
              1978
              28521

            
              1979
              27669

            
              1980
              27749

            
              1981
              27373

            
              1982
              25520

## linear_regression.py
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

df = pd.read_csv('uciml_auto_city_highway_mpg.csv', header=0)

#Plot the original data set
df.plot.scatter(x='City MPG', y='Highway MPG')
plt.show()

## uciml_auto_city_highway_mpg.csv

          
            City MPG
            Highway MPG

            
              21
              27

            
              21
              27

            
              19
              26

            
              24
              30

            
              18
              22

            
              19
              25

            
              19
              25

            
              19
              25

            
              17
              20

## seasonal_time_series.py
import matplotlib.pyplot as plt
import pandas as pd
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import seaborn as sns


df = pd.read_csv('boston_monthly_tmax_1998_2019.csv', header=0, infer_datetime_format=True, parse_dates=[0], index_col=[0])
df.plot(marker='.')
plt.show()

## boston_monthly_tmax_1998_2019.csv

          
            Date
            Monthly Average Maximum

            
              1/15/1998
              39.71

            
              2/15/1998
              40.97

            
              3/15/1998
              48.75

            
              4/15/1998
              56.74

            
              5/15/1998
              68.75

            
              6/15/1998
              72

            
              7/15/1998
              82.62

            
              8/15/1998
              80.2

            
              9/15/1998
              74.44

## hand_cranked_pacf.py
import pandas as pd
from sklearn import linear_model


#Read the data into a pandas DataFrame
df = pd.read_csv('southern_osc.csv', header=0, infer_datetime_format=True, parse_dates=[0], index_col=[0])

#add two columns containing the LAG=1 and LAG=2 version of the data to the DataFrame
df['T_(i-1)'] = df['T_i'].shift(1)
df['T_(i-2)'] = df['T_i'].shift(2)

## southern_osc.csv

          
            Date
            T_i

            
              Jan-51
              1.5

            
              Feb-51
              0.9

            
              Mar-51
              -0.1

            
              Apr-51
              -0.3

            
              May-51
              -0.7

            
              Jun-51
              0.2

            
              Jul-51
              -1

            
              Aug-51
              -0.2

            
              Sep-51
              -1.1

## poisson_sim.py
import random
import math

_lambda = 5
_num_arrivals = 100
_arrival_time = 0

print('RAND,INTER_ARRV_T,ARRV_T')

for i in range(_num_arrivals):

## nyc_bb_bicyclist_counts.csv

          
            Date
            HIGH_T
            LOW_T
            PRECIP
            BB_COUNT

            
              1-Apr-17
              46.00
              37.00
              0.00
              606

            
              2-Apr-17
              62.10
              41.00
              0.00
              2021

            
              3-Apr-17
              63.00
              50.00
              0.03
              2470

            
              4-Apr-17
              51.10
              46.00
              1.18
              723

            
              5-Apr-17
              63.00
              46.00
              0.00
              2807

            
              6-Apr-17
              48.90
              41.00
              0.73
              461

            
              7-Apr-17
              48.00
              43.00
              0.01
              1222

            
              8-Apr-17
              55.90
              39.90
              0.00
              1674

            
              9-Apr-17
              66.00
              45.00
              0.00
              2375
	import argparse
	import numpy as np
	import pmdarima as pm
	import pandas as pd
	import matplotlib.pyplot as plt
	from scipy.stats import t

	def auto_arima(in_csv_file_path):
	print('IN File==>' + in_csv_file_path)
	df = pd.read_csv(in_csv_file_path, header=0, infer_datetime_format=True, parse_dates=[0], index_col=[0])
	Year	Number of Cases
	1974	30122
	1975	33989
	1976	32105
	1977	30145
	1978	28521
	1979	27669
	1980	27749
	1981	27373
	1982	25520
	import pandas as pd
	from sklearn.model_selection import train_test_split
	from sklearn.linear_model import LinearRegression
	import matplotlib.pyplot as plt

	df = pd.read_csv('uciml_auto_city_highway_mpg.csv', header=0)

	#Plot the original data set
	df.plot.scatter(x='City MPG', y='Highway MPG')
	plt.show()
	import matplotlib.pyplot as plt
	import pandas as pd
	from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
	import seaborn as sns


	df = pd.read_csv('boston_monthly_tmax_1998_2019.csv', header=0, infer_datetime_format=True, parse_dates=[0], index_col=[0])
	df.plot(marker='.')
	plt.show()
	Date	Monthly Average Maximum
	1/15/1998	39.71
	2/15/1998	40.97
	3/15/1998	48.75
	4/15/1998	56.74
	5/15/1998	68.75
	6/15/1998	72
	7/15/1998	82.62
	8/15/1998	80.2
	9/15/1998	74.44
	import pandas as pd
	from sklearn import linear_model


	#Read the data into a pandas DataFrame
	df = pd.read_csv('southern_osc.csv', header=0, infer_datetime_format=True, parse_dates=[0], index_col=[0])

	#add two columns containing the LAG=1 and LAG=2 version of the data to the DataFrame
	df['T_(i-1)'] = df['T_i'].shift(1)
	df['T_(i-2)'] = df['T_i'].shift(2)
	Date	T_i
	Jan-51	1.5
	Feb-51	0.9
	Mar-51	-0.1
	Apr-51	-0.3
	May-51	-0.7
	Jun-51	0.2
	Jul-51	-1
	Aug-51	-0.2
	Sep-51	-1.1
	import random
	import math

	_lambda = 5
	_num_arrivals = 100
	_arrival_time = 0

	print('RAND,INTER_ARRV_T,ARRV_T')

	for i in range(_num_arrivals):
Date	HIGH_T	LOW_T	PRECIP	BB_COUNT
1-Apr-17	46.00	37.00	0.00	606
2-Apr-17	62.10	41.00	0.00	2021
3-Apr-17	63.00	50.00	0.03	2470
4-Apr-17	51.10	46.00	1.18	723
5-Apr-17	63.00	46.00	0.00	2807
6-Apr-17	48.90	41.00	0.73	461
7-Apr-17	48.00	43.00	0.01	1222
8-Apr-17	55.90	39.90	0.00	1674
9-Apr-17	66.00	45.00	0.00	2375