Skip to content

Instantly share code, notes, and snippets.

@JohnDeJesus22
JohnDeJesus22 / kmeansgrouper backend function
Created October 22, 2019 14:22
Backend function to generate create features web-page
# kmeans grouper dashboard
@bp.route('/kmeansdashboard/<email>', methods=['GET', 'POST'])
@login_required
def kmeansdashboard(email):
# get user data from database
user = User.query.filter_by(email=email).first_or_404()
data = user.upload
# get columns
columns = [*data.columns]
@JohnDeJesus22
JohnDeJesus22 / HgPMF
Created October 28, 2019 01:40
Hypergeometric PMF
import numpy as np
import matplotlib.pyplot as plt
from scipy.special import comb
def hypergeom_pmf(N, A, n, x):
'''
Probability Mass Function for Hypergeometric Distribution
:param N: population size
:param A: total number of desired items in N
def hypergeom_cdf(N, A, n, t, min_value=None):
'''
Cumulative Density Funtion for Hypergeometric Distribution
:param N: population size
:param A: total number of desired items in N
:param n: number of draws made from N
:param t: number of desired items in our draw of n items up to t
:returns: CDF computed up to t
'''
def hypergeom_plot(N, A, n):
'''
Visualization of Hypergeometric Distribution for given parameters
:param N: population size
:param A: total number of desired items in N
:param n: number of draws made from N
:returns: Plot of Hypergeometric Distribution for given parameters
'''
@JohnDeJesus22
JohnDeJesus22 / Pbc example with scipy
Created November 26, 2019 19:25
PBC applied to quiz data
import pandas as pd
from scipy.stats import pointbiserialr
# get data
data = pd.read_csv('D:\quiz-Alg2U0Quiz-standard20180510.csv')
# get continuous and dichotomous data
grades = data['Percent Correct']
question_1 = data['Q1']
@JohnDeJesus22
JohnDeJesus22 / PBCpandasScratch
Created November 26, 2019 19:35
PBC from scratch using pandas and numpy
def pbc_scratch(binary_data, continuous_data, data):
"""
Function that computes the point biserial correlation of two pandas data frame columns
:param binary_data: name of dichotomous data column
:param continuous_data: name of dichotomous data column
:param data: dataframe where above columns come from
:returns: Point Biserial Correlation
"""
bd_unique = data[binary_data].unique()
@JohnDeJesus22
JohnDeJesus22 / covid_ll_libraries
Last active June 3, 2020 02:36
Libraries imported for covid learning loss research
# import libraries
import pandas as pd
from googlesearch import search
from newspaper import Article, Config
from newspaper.article import ArticleException, ArticleDownloadState
import matplotlib.pyplot as plt
import seaborn as sns
from nltk import ne_chunk, pos_tag, word_tokenize
import torch
@JohnDeJesus22
JohnDeJesus22 / google_search
Created June 3, 2020 01:49
Gather_google_links
@JohnDeJesus22
JohnDeJesus22 / newspaper_extract
Created June 3, 2020 01:53
article_download_newspaper_library
# set configuration to not capture images
config = Config()
config.fetch_images = False
# applying article function from newspaper to each link and creating list
articles = [Article(url) for url in search_results]
# download the articles
articles_downloaded = [article.download() for article in articles]
@JohnDeJesus22
JohnDeJesus22 / store_in_dataframe
Created June 3, 2020 01:54
convert text to pandas dataframe
# initiate dataframe for text data
text_data = pd.DataFrame()
# Create columns for text data
text_data['title'] = [article.title for article in articles]
text_data['authors'] = [article.authors for article in articles]
text_data['text'] = article_text
text_data['links'] = search_results