Germán Martínez GermanCM

## china_electric_vehicles_sales.csv

          
            Year/Month
            BEV
            sales

            
              2011/01
              588
              310

            
              2011/02
              47
              417

            
              2011/03
              159
              286

            
              2011/04
              173
              162

            
              2011/05
              305
              470

            
              2011/06
              319
              273

            
              2011/07
              535
              650

            
              2011/08
              917
              1323

            
              2011/09
              57
              688

## numeric_Shapiro-Wilk_normality_test.py
from numpy.random import seed
from numpy.random import randn
from scipy.stats import shapiro
'''
24.5.2 Shapiro-Wilk Test
The Shapiro-Wilk test evaluates a data sample and quanties how likely it is that the data
was drawn from a Gaussian distribution
'''
# normality test
stat, p = shapiro(sample_singles)

## visual_normality_check.py
from statsmodels.graphics.gofplots import qqplot
from matplotlib import pyplot

# q-q plot
sample_normal = normal(size=1000)
qqplot(sample_normal, line='s')
qqplot(sample, line='s')
pyplot.show()

## fill_na_on_column.py
import numpy as np

not_nan_mask = np.isnan(np.array(bank_product_rec_santander_data.renta.values))==False
renta_values_not_nan=bank_product_rec_santander_data.renta.values[not_nan_mask]

sub_renta_mean = np.mean(renta_values_not_nan).round(2)
bank_product_rec_santander_sub_df['renta'] = bank_product_rec_santander_sub_df['renta'].fillna(value=sub_renta_mean)

## plotly_line_chart.py
import plotly.graph_objects as go

fig = go.Figure(data=go.Scatter(x=dataset.index, y=dataset.dependent_variable, mode='lines+markers+text'))
fig.show()

## get_all_combinations.py
def makeCartesianProduct(array_x, array_y, array_z):
    import pandas as pd
    import itertools

    return pd.DataFrame.from_records(itertools.product(array_x.reshape(-1, ), array_y.reshape(-1, ), array_z.reshape(-1, )),
                                     columns=['x', 'y', 'z'])

combinations_cartesian_prod = makeCartesianProduct(jobs_df.A.unique(), jobs_df.B.unique(), jobs_df.C.unique())

## keras_tuner_bayes_opt_timeSeries.py
  # split a univariate dataset into train/test sets
def train_test_split(data, n_test):
    return data[:-int(n_test)], data[-int(n_test):]

# transform list into supervised learning format
def series_to_supervised(data, n_in, n_out=1):
    import pandas as pd

    df = pd.DataFrame(data)
    cols = list()

## bootstrap_CI_linear_reg.py
# bootstrap confidence intervals
import numpy as np
from numpy.random import seed, rand, randint, randn, std
from numpy import mean, median, percentile
from scipy.stats import linregress

# seed random number generator
seed(1)
# prepare data
x = 20 * randn(1000) + 100

## gist:2451b420b8f8313f92830bfc4b98bfa8

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                GermanCM
                / gist:2451b420b8f8313f92830bfc4b98bfa8
            
            
              Created
              March 26, 2020 16:56
                — forked from rxaviers/gist:7360908
            
              
                Complete list of github markdown emoji markup
              
          
    People


 :bowtie:
😄 :smile:
😆 :laughing:


😊 :blush:
😃 :smiley:
☺️ :relaxed:


😏 :smirk:
😍 :heart_eyes:
😘 :kissing_heart:


😚 :kissing_closed_eyes:
😳 :flushed:
😌 :relieved:


😆 :satisfied:
😁 :grin:
😉 :wink:


😜 :stuck_out_tongue_winking_eye:
😝 :stuck_out_tongue_closed_eyes:
😀 :grinning:


😗 :kissing:
😙 :kissing_smiling_eyes:
😛 :stuck_out_tongue:


## time_series_imputer.py
#source: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.interpolate.html
def imputeMissingInterpolating(array_df):
    import numpy as np
    for attr in array_df.columns:
        attribute_interpolated = array_df[attr].interpolate(method='linear', limit_direction='both')
        assert len(attribute_interpolated[np.isnan(attribute_interpolated)]) == 0

        array_df[attr] = attribute_interpolated

    return array_df
Year/Month	BEV	sales
2011/01	588	310
2011/02	47	417
2011/03	159	286
2011/04	173	162
2011/05	305	470
2011/06	319	273
2011/07	535	650
2011/08	917	1323
2011/09	57	688
	from numpy.random import seed
	from numpy.random import randn
	from scipy.stats import shapiro
	'''
	24.5.2 Shapiro-Wilk Test
	The Shapiro-Wilk test evaluates a data sample and quanties how likely it is that the data
	was drawn from a Gaussian distribution
	'''
	# normality test
	stat, p = shapiro(sample_singles)
	from statsmodels.graphics.gofplots import qqplot
	from matplotlib import pyplot

	# q-q plot
	sample_normal = normal(size=1000)
	qqplot(sample_normal, line='s')
	qqplot(sample, line='s')
	pyplot.show()
	import numpy as np

	not_nan_mask = np.isnan(np.array(bank_product_rec_santander_data.renta.values))==False
	renta_values_not_nan=bank_product_rec_santander_data.renta.values[not_nan_mask]

	sub_renta_mean = np.mean(renta_values_not_nan).round(2)
	bank_product_rec_santander_sub_df['renta'] = bank_product_rec_santander_sub_df['renta'].fillna(value=sub_renta_mean)
	import plotly.graph_objects as go

	fig = go.Figure(data=go.Scatter(x=dataset.index, y=dataset.dependent_variable, mode='lines+markers+text'))
	fig.show()
	def makeCartesianProduct(array_x, array_y, array_z):
	import pandas as pd
	import itertools

	return pd.DataFrame.from_records(itertools.product(array_x.reshape(-1, ), array_y.reshape(-1, ), array_z.reshape(-1, )),
	columns=['x', 'y', 'z'])

	combinations_cartesian_prod = makeCartesianProduct(jobs_df.A.unique(), jobs_df.B.unique(), jobs_df.C.unique())
	# split a univariate dataset into train/test sets
	def train_test_split(data, n_test):
	return data[:-int(n_test)], data[-int(n_test):]

	# transform list into supervised learning format
	def series_to_supervised(data, n_in, n_out=1):
	import pandas as pd

	df = pd.DataFrame(data)
	cols = list()
	# bootstrap confidence intervals
	import numpy as np
	from numpy.random import seed, rand, randint, randn, std
	from numpy import mean, median, percentile
	from scipy.stats import linregress

	# seed random number generator
	seed(1)
	# prepare data
	x = 20 * randn(1000) + 100
`:bowtie:`	😄 `:smile:`	😆 `:laughing:`
😊 `:blush:`	😃 `:smiley:`	☺️ `:relaxed:`
😏 `:smirk:`	😍 `:heart_eyes:`	😘 `:kissing_heart:`
😚 `:kissing_closed_eyes:`	😳 `:flushed:`	😌 `:relieved:`
😆 `:satisfied:`	😁 `:grin:`	😉 `:wink:`
😜 `:stuck_out_tongue_winking_eye:`	😝 `:stuck_out_tongue_closed_eyes:`	😀 `:grinning:`
😗 `:kissing:`	😙 `:kissing_smiling_eyes:`	😛 `:stuck_out_tongue:`
	#source: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.interpolate.html
	def imputeMissingInterpolating(array_df):
	import numpy as np
	for attr in array_df.columns:
	attribute_interpolated = array_df[attr].interpolate(method='linear', limit_direction='both')
	assert len(attribute_interpolated[np.isnan(attribute_interpolated)]) == 0

	array_df[attr] = attribute_interpolated

	return array_df