John DeJesus JohnDeJesus22

## kmeansgrouper backend function
# kmeans grouper dashboard
@bp.route('/kmeansdashboard/<email>', methods=['GET', 'POST'])
@login_required
def kmeansdashboard(email):
    # get user data from database
    user = User.query.filter_by(email=email).first_or_404()
    data = user.upload

    # get columns
    columns = [*data.columns]

## HgPMF
import numpy as np
import matplotlib.pyplot as plt
from scipy.special import comb

def hypergeom_pmf(N, A, n, x):

    '''
    Probability Mass Function for Hypergeometric Distribution
    :param N: population size
    :param A: total number of desired items in N

## Hypergeometric CDF
def hypergeom_cdf(N, A, n, t, min_value=None):

    '''
    Cumulative Density Funtion for Hypergeometric Distribution
    :param N: population size
    :param A: total number of desired items in N
    :param n: number of draws made from N
    :param t: number of desired items in our draw of n items up to t
    :returns: CDF computed up to t
    '''

## HGdistplot
def hypergeom_plot(N, A, n):

    '''
    Visualization of Hypergeometric Distribution for given parameters
    :param N: population size
    :param A: total number of desired items in N
    :param n: number of draws made from N
    :returns: Plot of Hypergeometric Distribution for given parameters
    '''


## Pbc example with scipy
import pandas as pd
from scipy.stats import pointbiserialr

# get data
data = pd.read_csv('D:\quiz-Alg2U0Quiz-standard20180510.csv')

# get continuous and dichotomous data
grades = data['Percent Correct']
question_1 = data['Q1']

## PBCpandasScratch
def pbc_scratch(binary_data, continuous_data, data):
    """
    Function that computes the point biserial correlation of two pandas data frame columns
    :param binary_data: name of dichotomous data column
    :param continuous_data: name of dichotomous data column
    :param data: dataframe where above columns come from
    :returns: Point Biserial Correlation
    """

    bd_unique = data[binary_data].unique()

## covid_ll_libraries
# import libraries

import pandas as pd
from googlesearch import search
from newspaper import Article, Config
from newspaper.article import ArticleException, ArticleDownloadState
import matplotlib.pyplot as plt
import seaborn as sns
from nltk import ne_chunk, pos_tag, word_tokenize
import torch

## google_search
# Run google search and convert to list
search_text = 'covid learning loss'
search_results = [url for url in search(search_text, tld='com', lang='en', num=78, start=0, stop=78, pause=2.0)]

## newspaper_extract
# set configuration to not capture images
config = Config()
config.fetch_images = False

# applying article function from newspaper to each link and creating list
articles = [Article(url) for url in search_results]

# download the articles
articles_downloaded = [article.download() for article in articles]

## store_in_dataframe
# initiate dataframe for text data
text_data = pd.DataFrame()

# Create columns for text data
text_data['title'] = [article.title for article in articles]
text_data['authors'] = [article.authors for article in articles]
text_data['text'] = article_text
text_data['links'] = search_results
	# kmeans grouper dashboard
	@bp.route('/kmeansdashboard/<email>', methods=['GET', 'POST'])
	@login_required
	def kmeansdashboard(email):
	# get user data from database
	user = User.query.filter_by(email=email).first_or_404()
	data = user.upload

	# get columns
	columns = [*data.columns]
	import numpy as np
	import matplotlib.pyplot as plt
	from scipy.special import comb

	def hypergeom_pmf(N, A, n, x):

	'''
	Probability Mass Function for Hypergeometric Distribution
	:param N: population size
	:param A: total number of desired items in N
	def hypergeom_cdf(N, A, n, t, min_value=None):

	'''
	Cumulative Density Funtion for Hypergeometric Distribution
	:param N: population size
	:param A: total number of desired items in N
	:param n: number of draws made from N
	:param t: number of desired items in our draw of n items up to t
	:returns: CDF computed up to t
	'''
	def hypergeom_plot(N, A, n):

	'''
	Visualization of Hypergeometric Distribution for given parameters
	:param N: population size
	:param A: total number of desired items in N
	:param n: number of draws made from N
	:returns: Plot of Hypergeometric Distribution for given parameters
	'''
	import pandas as pd
	from scipy.stats import pointbiserialr

	# get data
	data = pd.read_csv('D:\quiz-Alg2U0Quiz-standard20180510.csv')

	# get continuous and dichotomous data
	grades = data['Percent Correct']
	question_1 = data['Q1']
	def pbc_scratch(binary_data, continuous_data, data):
	"""
	Function that computes the point biserial correlation of two pandas data frame columns
	:param binary_data: name of dichotomous data column
	:param continuous_data: name of dichotomous data column
	:param data: dataframe where above columns come from
	:returns: Point Biserial Correlation
	"""

	bd_unique = data[binary_data].unique()
	# import libraries

	import pandas as pd
	from googlesearch import search
	from newspaper import Article, Config
	from newspaper.article import ArticleException, ArticleDownloadState
	import matplotlib.pyplot as plt
	import seaborn as sns
	from nltk import ne_chunk, pos_tag, word_tokenize
	import torch
	# Run google search and convert to list
	search_text = 'covid learning loss'
	search_results = [url for url in search(search_text, tld='com', lang='en', num=78, start=0, stop=78, pause=2.0)]
	# set configuration to not capture images
	config = Config()
	config.fetch_images = False

	# applying article function from newspaper to each link and creating list
	articles = [Article(url) for url in search_results]

	# download the articles
	articles_downloaded = [article.download() for article in articles]
	# initiate dataframe for text data
	text_data = pd.DataFrame()

	# Create columns for text data
	text_data['title'] = [article.title for article in articles]
	text_data['authors'] = [article.authors for article in articles]
	text_data['text'] = article_text
	text_data['links'] = search_results