Skip to content

Instantly share code, notes, and snippets.

View ant358's full-sized avatar
🎯
Focusing

Anthony Wynne ant358

🎯
Focusing
  • Plymouth, UK
View GitHub Profile
@ant358
ant358 / tweet_listener.py
Created January 5, 2019 17:33 — forked from hugobowne/tweet_listener.py
Here I define a Tweet listener that creates a file called 'tweets.txt', collects streaming tweets as .jsons and writes them to the file 'tweets.txt'; once 100 tweets have been streamed, the listener closes the file and stops listening.
class MyStreamListener(tweepy.StreamListener):
def __init__(self, api=None):
super(MyStreamListener, self).__init__()
self.num_tweets = 0
self.file = open("tweets.txt", "w")
def on_status(self, status):
tweet = status._json
self.file.write( json.dumps(tweet) + '\n' )
self.num_tweets += 1
def first_n_pairs(dict_to_see, n):
""" Useful with large dictionarys to see what thae data looks like"""
a = {k: dict_to_see[k] for k in list(dict_to_see)[:n]}
print(a)
def replace_all_NaN(df):
""" If you are confident that numbers can be replaced with 0 and
objects can be replaced by No_columnname this function will do that over the whole data frame
will add more data types as I come across them. It prints info() when finished to check it
has captured them all"""
for col in df:
if df[col].dtype == 'object' and df[col].isna().sum() > 0:
df[col] = df[col].fillna('No_' + col)
elif df[col].dtype == 'float64' and df[col].isna().sum() > 0:
df[col] = df[col].fillna(0.0)
def sig_num_columns(X_train, y_train, p_thres=0.05):
"""Which numerical features held in columns within the training data set are significantly correlated with
the target. Returns a dataframe with the column name and its p value. pvalue set to 0.05 for
95% confidence level enter a new p_thres if you want to change it. Only returns the significant columns
only pass numerical columns to the function! Other column types will return a shape error1"""
from scipy.stats import linregress
global sig_num
sig_num = {}
for col in X_train:
slope, intercept, rvalue, pvalue, stderr = linregress(X_train[col], y_train)
def anova_machine(Cat_col, target_col, df):
"""ANOVA function. Provide the target variable column y, the main data set and a categorical column.
A pivot table will be produced. Then an ANOVA performed to see if the columns are significantly different from each other.
Currently set for 95% confidence, will update later for higher significance setting."""
p_table = df.pivot(columns=Cat_col, values=target_col)
total_columns = len(p_table.columns)
total_rows = len(p_table)
@ant358
ant358 / truncate.py
Created January 6, 2022 15:21
Truncate numbers (force them to round down)
def truncate(f, n=2):
return math.trunc(f * 10 ** n) / 10 ** n
@ant358
ant358 / install_jupyter_dash.py
Created October 6, 2022 21:01
Install the packages needed for Google Colab to run Jupyter Dash
# install required modules not in Colab by default
!pip install --quiet jupyter-dash dash_daq dash-bootstrap-components
@ant358
ant358 / agile-dash-imports.py
Created October 6, 2022 21:04
Import the modules required to run a Jupyter Dash dashboard in Google Colab
# load python modules
import dash_daq as daq
import dash_bootstrap_components as dbc
import pandas as pd
import numpy as np
from jupyter_dash import JupyterDash
from dash import dcc, html, Input, Output
@ant358
ant358 / agile-dash-proxy.py
Created October 6, 2022 21:06
Setup the Jupyter Dash proxy
# setup the proxy
JupyterDash.infer_jupyter_proxy_config()
@ant358
ant358 / agile-dash-gauge.py
Created October 6, 2022 21:12
The Dash gauge for Jupyter Dash dashboard
# the gauge
gauge = [daq.Gauge(
id='agile-gauge',
# make a colour gradient on the scale
color={"gradient":True,
"ranges":{"green":[0,33],
"yellow":[33,66],
"red":[66,100]}},
# create a custom scale
scale={"custom": {