Anthony Wynne ant358

## tweet_listener.py
class MyStreamListener(tweepy.StreamListener):
    def __init__(self, api=None):
        super(MyStreamListener, self).__init__()
        self.num_tweets = 0
        self.file = open("tweets.txt", "w")

    def on_status(self, status):
        tweet = status._json
        self.file.write( json.dumps(tweet) + '\n' )
        self.num_tweets += 1

## first_n_pairs.py
def first_n_pairs(dict_to_see, n):
    """ Useful with large dictionarys to see what thae data looks like"""
    a = {k: dict_to_see[k] for k in list(dict_to_see)[:n]}
    print(a)

## replace_all_NaN.py
def replace_all_NaN(df):
    """ If you are confident that numbers can be replaced with 0 and
    objects can be replaced by No_columnname this function will do that over the whole data frame
    will add more data types as I come across them. It prints info() when finished to check it
    has captured them all"""
    for col in df:
        if df[col].dtype == 'object' and df[col].isna().sum() > 0:
            df[col] = df[col].fillna('No_' + col)
        elif df[col].dtype == 'float64' and df[col].isna().sum() > 0:
            df[col] = df[col].fillna(0.0)

## sig_num_columns.py
def sig_num_columns(X_train, y_train, p_thres=0.05):
    """Which numerical features held in columns within the training data set are significantly correlated with
    the target. Returns a dataframe with the column name and its p value. pvalue set to 0.05 for
    95% confidence level enter a new p_thres if you want to change it. Only returns the significant columns
    only pass numerical columns to the function! Other column types will return a shape error1"""
    from scipy.stats import linregress
    global sig_num
    sig_num = {}
    for col in X_train:
        slope, intercept, rvalue, pvalue, stderr = linregress(X_train[col], y_train)

## anova_machine.py
def anova_machine(Cat_col, target_col, df):
    """ANOVA function.  Provide the target variable column y, the main data set and a categorical column.
    A pivot table will be produced. Then an ANOVA performed to see if the columns are significantly different from each other.
    Currently set for 95% confidence, will update later for higher significance setting."""

    p_table = df.pivot(columns=Cat_col, values=target_col)

    total_columns = len(p_table.columns)

    total_rows = len(p_table)

## truncate.py
    def truncate(f, n=2):
        return math.trunc(f * 10 ** n) / 10 ** n

## install_jupyter_dash.py
# install required modules not in Colab by default
!pip install --quiet jupyter-dash dash_daq dash-bootstrap-components

## agile-dash-imports.py
# load python modules
import dash_daq as daq
import dash_bootstrap_components as dbc
import pandas as pd
import numpy as np
from jupyter_dash import JupyterDash
from dash import dcc, html, Input, Output

## agile-dash-proxy.py
# setup the proxy
JupyterDash.infer_jupyter_proxy_config()

## agile-dash-gauge.py
# the gauge
gauge = [daq.Gauge(
                id='agile-gauge',
                # make a colour gradient on the scale
                color={"gradient":True,
                       "ranges":{"green":[0,33],
                       "yellow":[33,66],
                       "red":[66,100]}},
                # create a custom scale
                scale={"custom": {
	class MyStreamListener(tweepy.StreamListener):
	def __init__(self, api=None):
	super(MyStreamListener, self).__init__()
	self.num_tweets = 0
	self.file = open("tweets.txt", "w")

	def on_status(self, status):
	tweet = status._json
	self.file.write( json.dumps(tweet) + '\n' )
	self.num_tweets += 1
	def first_n_pairs(dict_to_see, n):
	""" Useful with large dictionarys to see what thae data looks like"""
	a = {k: dict_to_see[k] for k in list(dict_to_see)[:n]}
	print(a)
	def replace_all_NaN(df):
	""" If you are confident that numbers can be replaced with 0 and
	objects can be replaced by No_columnname this function will do that over the whole data frame
	will add more data types as I come across them. It prints info() when finished to check it
	has captured them all"""
	for col in df:
	if df[col].dtype == 'object' and df[col].isna().sum() > 0:
	df[col] = df[col].fillna('No_' + col)
	elif df[col].dtype == 'float64' and df[col].isna().sum() > 0:
	df[col] = df[col].fillna(0.0)
	def sig_num_columns(X_train, y_train, p_thres=0.05):
	"""Which numerical features held in columns within the training data set are significantly correlated with
	the target. Returns a dataframe with the column name and its p value. pvalue set to 0.05 for
	95% confidence level enter a new p_thres if you want to change it. Only returns the significant columns
	only pass numerical columns to the function! Other column types will return a shape error1"""
	from scipy.stats import linregress
	global sig_num
	sig_num = {}
	for col in X_train:
	slope, intercept, rvalue, pvalue, stderr = linregress(X_train[col], y_train)
	def anova_machine(Cat_col, target_col, df):
	"""ANOVA function. Provide the target variable column y, the main data set and a categorical column.
	A pivot table will be produced. Then an ANOVA performed to see if the columns are significantly different from each other.
	Currently set for 95% confidence, will update later for higher significance setting."""

	p_table = df.pivot(columns=Cat_col, values=target_col)

	total_columns = len(p_table.columns)

	total_rows = len(p_table)
	def truncate(f, n=2):
	return math.trunc(f * 10 n) / 10 n
	# install required modules not in Colab by default
	!pip install --quiet jupyter-dash dash_daq dash-bootstrap-components
	# load python modules
	import dash_daq as daq
	import dash_bootstrap_components as dbc
	import pandas as pd
	import numpy as np
	from jupyter_dash import JupyterDash
	from dash import dcc, html, Input, Output
	# the gauge
	gauge = [daq.Gauge(
	id='agile-gauge',
	# make a colour gradient on the scale
	color={"gradient":True,
	"ranges":{"green":[0,33],
	"yellow":[33,66],
	"red":[66,100]}},
	# create a custom scale
	scale={"custom": {