This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# kmeans grouper dashboard | |
@bp.route('/kmeansdashboard/<email>', methods=['GET', 'POST']) | |
@login_required | |
def kmeansdashboard(email): | |
# get user data from database | |
user = User.query.filter_by(email=email).first_or_404() | |
data = user.upload | |
# get columns | |
columns = [*data.columns] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
from scipy.special import comb | |
def hypergeom_pmf(N, A, n, x): | |
''' | |
Probability Mass Function for Hypergeometric Distribution | |
:param N: population size | |
:param A: total number of desired items in N |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def hypergeom_cdf(N, A, n, t, min_value=None): | |
''' | |
Cumulative Density Funtion for Hypergeometric Distribution | |
:param N: population size | |
:param A: total number of desired items in N | |
:param n: number of draws made from N | |
:param t: number of desired items in our draw of n items up to t | |
:returns: CDF computed up to t | |
''' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def hypergeom_plot(N, A, n): | |
''' | |
Visualization of Hypergeometric Distribution for given parameters | |
:param N: population size | |
:param A: total number of desired items in N | |
:param n: number of draws made from N | |
:returns: Plot of Hypergeometric Distribution for given parameters | |
''' | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from scipy.stats import pointbiserialr | |
# get data | |
data = pd.read_csv('D:\quiz-Alg2U0Quiz-standard20180510.csv') | |
# get continuous and dichotomous data | |
grades = data['Percent Correct'] | |
question_1 = data['Q1'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def pbc_scratch(binary_data, continuous_data, data): | |
""" | |
Function that computes the point biserial correlation of two pandas data frame columns | |
:param binary_data: name of dichotomous data column | |
:param continuous_data: name of dichotomous data column | |
:param data: dataframe where above columns come from | |
:returns: Point Biserial Correlation | |
""" | |
bd_unique = data[binary_data].unique() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# import libraries | |
import pandas as pd | |
from googlesearch import search | |
from newspaper import Article, Config | |
from newspaper.article import ArticleException, ArticleDownloadState | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from nltk import ne_chunk, pos_tag, word_tokenize | |
import torch |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Run google search and convert to list | |
search_text = 'covid learning loss' | |
search_results = [url for url in search(search_text, tld='com', lang='en', num=78, start=0, stop=78, pause=2.0)] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# set configuration to not capture images | |
config = Config() | |
config.fetch_images = False | |
# applying article function from newspaper to each link and creating list | |
articles = [Article(url) for url in search_results] | |
# download the articles | |
articles_downloaded = [article.download() for article in articles] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# initiate dataframe for text data | |
text_data = pd.DataFrame() | |
# Create columns for text data | |
text_data['title'] = [article.title for article in articles] | |
text_data['authors'] = [article.authors for article in articles] | |
text_data['text'] = article_text | |
text_data['links'] = search_results |