TheBubblePopped/kmin.py

## kmin.py
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs
import pandas as pd
from sklearn import preprocessing as p
import plotly.graph_objects as go

pd.set_option('display.max_columns', 80)
pd.set_option('display.max_rows', 300)
pd.set_option('display.width', 1000)


from matplotlib import colors
colors = {0:'#1b9e77', 1:'#d95f02', 2:'#7570b3', 3:'#e7298a', 4:'#66a61e', 5:'#e6ab02', 6:'#a6761d', 7:'#666666'}

df = pd.read_excel('USDA_Food_Database.xlsx')

temp = df.copy().dropna() #earlycope to not remove keword and name colmn
temp2 = df.copy().dropna()
df.drop(df[['No', 'Name', 'Keyword']], axis=1, inplace=True)
cols = df.columns
df.dropna(inplace=True)


puh2=p.MinMaxScaler().fit_transform(df)  # numpy.Narray columnsnames get lost flatten out
newdf2 = pd.DataFrame(puh2) #transform back into df
newdf2.columns = cols #give columns names


newdf2 = newdf2.loc[:,['Water_(g)', 'Sugar_Tot_(g)']].dropna() #locate only 2 features we want to cluister

def fit_kmeans(data, centers):
    kmeans = KMeans(centers)
    labels = kmeans.fit_predict(data)
    return labels


def make_charts(num, tempfunc=temp.copy()):

    """makes first scatterplot"""
    labels = fit_kmeans(newdf2, num)
    tempfunc = tempfunc.loc[:, ['Keyword', 'Name', 'Water_(g)', 'Sugar_Tot_(g)']].dropna()
    tempfunc['Label'] = labels

    plt.title('Food Dataset K-Means Clustering')
    plt.ylabel('Sugar(g)')
    plt.xlabel('Water(g)')

    for g in np.unique(labels):
        ix = np.where(labels == g)
        ix = ix[0]
        plt.scatter(tempfunc.iloc[ix]['Water_(g)'], tempfunc.iloc[ix]['Sugar_Tot_(g)'], c=colors[g],
                    label="Group " + str(g + 1),
                    alpha=0.3)
    plt.legend()
    plt.show()

    """creates the fig for the food type"""
    if num < 4:
        myplot = plt.subplots(num, 1, figsize=(14, 16))[1]

        for x in range(0, num):
            yo = tempfunc[tempfunc['Label'] == x].groupby('Keyword').count()

            counter = yo['Label'].sort_values().tail(10)
            counter = (counter / counter.sum()) * 100

            myplot[x].barh(counter.index, counter, color=colors[x])
            text_kwargs = dict(fontsize=26)
            myplot[x].set_title(f"Group {x + 1} in %", **text_kwargs)
            myplot[x].tick_params(axis="x", labelsize=30)
            myplot[x].tick_params(axis="y", labelsize=20)

        plt.show()
    elif num == 4:
        myplot = plt.subplots(2, 2, figsize=(24, 14))[1]
        myplot2 = np.append(myplot[0], myplot[1])

        for x in range(0, num):
            yo = tempfunc[tempfunc['Label'] == x].groupby('Keyword').count()

            counter = yo['Label'].sort_values().tail(10)
            counter = (counter / counter.sum()) * 100

            myplot2[x].barh(counter.index, counter, color=colors[x])
            text_kwargs = dict(fontsize=26)
            myplot2[x].set_title(f"Group {x + 1} in %", **text_kwargs)
            myplot2[x].tick_params(axis="x", labelsize=30)
            myplot2[x].tick_params(axis="y", labelsize=20)
        plt.show()

    elif num > 4 and num < 7:
        myplot = plt.subplots(3, 2, figsize=(24, 14))[1]
        myplot2 = [x for sublot in myplot for x in sublot]


        for x in range(0, num):
            yo = tempfunc[tempfunc['Label'] == x].groupby('Keyword').count()

            counter = yo['Label'].sort_values().tail(10)
            counter = (counter / counter.sum()) * 100

            myplot2[x].barh(counter.index, counter, color=colors[x])
            text_kwargs = dict(fontsize=26)
            myplot2[x].set_title(f"Group {x + 1} in %", **text_kwargs)
            myplot2[x].tick_params(axis="x", labelsize=24)
            myplot2[x].tick_params(axis="y", labelsize=18)

        plt.show()

    elif num > 6 and num < 10:
        myplot = plt.subplots(3, 3, figsize=(34, 18))[1]
        myplot2 = [x for sublot in myplot for x in sublot]

        for x in range(0, num):
            yo = tempfunc[tempfunc['Label'] == x].groupby('Keyword').count()

            counter = yo['Label'].sort_values().tail(10)
            counter = (counter / counter.sum()) * 100

            myplot2[x].barh(counter.index, counter, color=colors[x])
            text_kwargs = dict(fontsize=26)
            myplot2[x].set_title(f"Group {x + 1} in %", **text_kwargs)
            myplot2[x].tick_params(axis="x", labelsize=24)
            myplot2[x].tick_params(axis="y", labelsize=19)

        plt.show()


for x in range(2,9):
    break#make_charts(x)


def plot_score(tries: int):
    scores = []

    for center in range(1, tries):
        kmeans = KMeans(center)
        model = kmeans.fit(newdf2)
        scores.append(abs(model.score(newdf2)))

    centers = list(range(1, tries))

    plt.xticks(np.arange(0, tries, step=1))
    plt.plot(centers, scores)
    plt.title("scree Plot")
    plt.xlabel('Centers')
    plt.ylabel('Average DIstance From Centroid')
    plt.show()


#plot_score(20)

def exec2():

    from sklearn import preprocessing


    scaled = preprocessing.QuantileTransformer().fit_transform(temp2[['Fiber_TD_(g)']])
    scaled = pd.DataFrame(scaled, columns=['Fiber_TD_(g)'])

    temp2['Keyword'] = temp2['Keyword'].astype('category')

    temp2['Codes'] = temp2[['Keyword']].apply(lambda x: x.cat.codes)
    unique_codes = temp2[['Codes', 'Keyword']].drop_duplicates() #inplace false
    unique_codes = unique_codes.sort_values('Codes').set_index('Codes').to_dict()['Keyword']


    fig = go.Figure(data=go.Parcoords(
        line=dict(color = scaled['Fiber_TD_(g)'],
                  colorscale = 'Viridis',
                  showscale = True,
                  cmin = 0,
                  cmax=1,
                  colorbar=dict(title=dict(text="Fiber_TD_(g)",
                                           font=dict(size=16)),
                                tickmode="array",
                                tickvals=[0,0.5,1],
                                ticks='outside',
                                ticktext=['Almost None', 'Not a lot', 'A lot'],
                                tickfont=dict(size=yout14))),
        dimensions=list([
            dict(tickvals=[x for x in range(0,36)], ticktext=list(unique_codes.values()),
                 label = 'FoodType', values=temp2['Codes']), #danger
            dict(range=[0, 100], label='Sugar_Tot_(g)', values=temp2['Sugar_Tot_(g)']),
            dict(range=[0, 89], label='Protein_(g)', values=temp2['Protein_(g)']),
            dict(range=[0, 33954], label='Beta_Carot_(µg)', values=temp2['Beta_Carot_(µg)']),
            dict(range=[0, 68466], label='Vit_A_(µg)', values=temp2['Vit_A_(µg)']),
            dict(range=[0, 15], label='Copper_(mg)', values=temp2['Copper_(mg)']),
            dict(range=[0, 781], label='Magnesium_(mg)', values=temp2['Magnesium_(mg)']),
            dict(range=[0, 100], label='Lipid_Tot_(g)', values=temp2['Lipid_Tot_(g)']),
            dict(range=[0, 84],
                 label='FA_Mono_(g)', values=temp2['FA_Mono_(g)']),
            dict(range=[0, 902], label='Energy_(kcal)', values=temp2['Energy_(kcal)']),
            dict(range=[0, 100],
                 label='Water_(g)', values=temp2['Water_(g)']),
        ])
    ))
    fig.show()
    """Excercise 2"""

    import plotly.figure_factory as ff
    import numpy as np

    numpied_df = df.to_numpy()


    hist_data = [numpied_df.T[0]]

    group_labels = ['Water_(g)']

    # Create distplot with custom bin_size
    fig = ff.create_distplot(hist_data, group_labels, bin_size=.4, curve_type='normal')
    fig.update_layout(title_text='Normal Distribution Curve Plot')
    fig.show()


    cov_data = np.corrcoef(numpied_df.T)
    column_categ = df.columns

    fig = go.Figure(data=go.Heatmap(
        z=cov_data,x=column_categ, y=column_categ, colorscale='greys'
    ))
    fig.update_layout(title={
        'text':'Pearson Product-Moment Correlation Coefficients Heatmap',
        'font':{'size':56, 'color':'#110a54'},
        'xanchor':'center',
        'yanchor':'top',
        'y':0.975,
        'x':0.5


    })
    fig.show()


#exec2()


def exec3():
    from itertools import groupby
    from operator import itemgetter
    df = pd.read_excel('USDA_Food_Database.xlsx')
    #print(df.head()) #inspect some first data
    #print(df.info()) #find memory usage and the null values on specific columns
    #print(df.describe(include=[np.number, 'object']).T) # find unique keywords and percentiles and top keywords(Vegies)
    df3 = df.set_index(['Keyword', 'Name'])


    #print(df.count(level='Keyword'))
    df_index_combination_tuple = df3.sort_values('Keyword')["No"].to_dict().keys()
    df_index_combination_tuple = list(df_index_combination_tuple)


    #Creates list of subvalues f.e. [Alcohol,[highpercentagealc,lowpercentagealc,alc from companyA,alc from companyB]]
    df_names_combination_nested_list_tuple = [(k, [x for _, x in g]) for k, g in groupby(df_index_combination_tuple, itemgetter(0))]
    #spectate subvalues


    """Exercice 3 pick 1 failed try"""
    picked_categories = ['GOOSE', 'DUCK']
    id_list = ['GOOSE', 'DUCK']
    swap_list =["", ""]

    split_here = []
    for category in df_names_combination_nested_list_tuple:
        if category[0] in picked_categories:
            id_list += category[1]
            split_here.append(len(id_list))

    for k in range(2, len(id_list)):

        if k < split_here[0]:
            swap_list.append("DUCK")
        else:
            swap_list.append("GOOSE")

    fig=go.Figure(go.Sunburst(
        labels=id_list,
        parents=swap_list,
        marker=dict(colors=['#e5f5f9', '#99d8c9'])
    ))
    fig.update_layout(margin = dict(t=0, l=0, r=0, b=0))
    fig.show()

    """Ex. 3 non failed try"""

    categories = ['Water_(g)', 'Energy_(kcal)', 'Protein_(g)', 'Lipid_Tot_(g)', 'Thiamin_(mg)', 'Niacin_(mg)',
                  'Selenium_(µg)', 'Iron_(mg)']
    df3 = df.loc[:, categories + ['Keyword', 'Name']].copy()
    criteria = (df.Keyword == 'DUCK') | (df.Keyword == 'GOOSE')
    df3 = df3[criteria]
    """"min     max    mean    median    sum    count    std    var
    size    describe    nunique     idxmin     idxmax"""
    grouped = df3.groupby('Keyword')[categories].agg('mean')

    grouped.loc['DUCK']['Energy_(kcal)'] = grouped.loc['DUCK']['Energy_(kcal)']/20
    grouped.loc['GOOSE']['Energy_(kcal)'] = grouped.loc['GOOSE']['Energy_(kcal)'] / 20

    grouped.loc['DUCK']['Water_(g)'] = grouped.loc['DUCK']['Water_(g)'] / 3
    grouped.loc['GOOSE']['Water_(g)'] = grouped.loc['GOOSE']['Water_(g)'] / 3

    grouped.loc['DUCK']['Iron_(mg)'] = grouped.loc['DUCK']['Iron_(mg)'] * 2
    grouped.loc['GOOSE']['Iron_(mg)'] = grouped.loc['GOOSE']['Iron_(mg)'] * 2

    grouped.loc['DUCK']['Thiamin_(mg)'] = grouped.loc['DUCK']['Thiamin_(mg)'] * 50
    grouped.loc['GOOSE']['Thiamin_(mg)'] = grouped.loc['GOOSE']['Thiamin_(mg)'] * 50

    categories = ['Water_(g)/3', 'Energy_(kcal)/20', 'Protein_(g)', 'Lipid_Tot_(g)', 'Thiamin_(mg)*50', 'Niacin_(mg)',
                  'Selenium_(µg)', 'Iron_(mg)*2']


    fig = go.Figure()
    fig.add_trace(go.Scatterpolar(
        r=grouped.loc['DUCK'].values,
        theta=categories,
        fill='toself',
        name='DUCK'
    ))
    fig.add_trace(go.Scatterpolar(
        r=grouped.loc['GOOSE'].values,
        theta=categories,
        fill='toself',
        name='GOOSSE'
    ))
    fig.update_layout(
        polar=dict(
            radialaxis=dict(
                visible=True,
            )),
        showlegend=True
    )

    fig.show()

    """AVOCADO"""
    categories = ['Water_(g)', 'Energy_(kcal)', 'Lipid_Tot_(g)', 'Carbohydrt_(g)','Sugar_Tot_(g)', 'Phosphorus_(mg)',
                  'Potassium_(mg)', 'Vit_B6_(mg)', 'Thiamin_(mg)', 'Niacin_(mg)', 'Iron_(mg)', 'Vit_E_(mg)']

    categoriediv = [('Water_(g)',5), ('Energy_(kcal)', 10), ('Phosphorus_(mg)', 5), ('Potassium_(mg)', 50)]
    categoriemul = [('Sugar_Tot_(g)', 4), ('Vit_B6_(mg)', 10), ('Thiamin_(mg)', 100), ('Iron_(mg)', 10)]
    df4 = df.loc[:, categories + ['Keyword', 'Name']].copy() #categories

    criteria = (df.Name == 'AVOCADOS,RAW,ALL COMM VAR') | (df.Name == 'AVOCADOS,RAW,CALIFORNIA') |\
               (df.Name == 'AVOCADOS,RAW,FLORIDA')
    df4 = df4[criteria]
    df4 = df4.groupby('Name')[categories].agg('mean')
    categories = ['Water_(g)/5', 'Energy_(kcal)/10', 'Lipid_Tot_(g)', 'Carbohydrt_(g)', 'Sugar_Tot_(g)*4', 'Phosphorus_(mg)/5',
                  'Potassium_(mg)/50', 'Vit_B6_(mg)*10', 'Thiamin_(mg)*100', 'Niacin_(mg)', 'Iron_(mg)*10', 'Vit_E_(mg)']

    for cat in categoriediv:
        df4.loc['AVOCADOS,RAW,ALL COMM VAR', cat[0]] = df4.loc['AVOCADOS,RAW,ALL COMM VAR'][cat[0]] / cat[1]
        df4.loc['AVOCADOS,RAW,CALIFORNIA', cat[0]] = df4.loc['AVOCADOS,RAW,CALIFORNIA'][cat[0]] / cat[1]
        df4.loc['AVOCADOS,RAW,FLORIDA', cat[0]] = df4.loc['AVOCADOS,RAW,FLORIDA'][cat[0]] / cat[1]
    for cat in categoriemul:
        df4.loc['AVOCADOS,RAW,ALL COMM VAR', cat[0]] = df4.loc['AVOCADOS,RAW,ALL COMM VAR'][cat[0]] * cat[1]
        df4.loc['AVOCADOS,RAW,CALIFORNIA', cat[0]] = df4.loc['AVOCADOS,RAW,CALIFORNIA'][cat[0]] * cat[1]
        df4.loc['AVOCADOS,RAW,FLORIDA', cat[0]] = df4.loc['AVOCADOS,RAW,FLORIDA'][cat[0]] * cat[1]


    fig = go.Figure()
    fig.add_trace(go.Scatterpolar(
        r=df4.loc['AVOCADOS,RAW,ALL COMM VAR'].values,
        theta=categories,
        fill='toself',
        name='AVOCADOS,RAW,ALL COMM VAR'
    ))
    fig.add_trace(go.Scatterpolar(
        r=df4.loc['AVOCADOS,RAW,CALIFORNIA'].values,
        theta=categories,
        fill='toself',
        name='AVOCADOS,RAW,CALIFORNIA'
    ))

    fig.add_trace(go.Scatterpolar(
        r=df4.loc['AVOCADOS,RAW,FLORIDA'].values,
        theta=categories,
        fill='toself',
        name='AVOCADOS,RAW,FLORIDA'
    ))

    fig.update_layout(
        polar=dict(
            radialaxis=dict(
                visible=True,
            )),
        showlegend=True
    )

    fig.show()

    """'BLUEBERRIES,FRZ,SWTND', 'BLUEBERRIES,FRZ,UNSWTND', 'BLUEBERRIES,RAW', 'BLUEBERRIES,WILD,FRZ'"""
    categories = ['Fiber_TD_(g)', 'Calcium_(mg)', 'Iron_(mg)',
                     'Phosphorus_(mg)', 'Sodium_(mg)', 'Zinc_(mg)', 'Manganese_(mg)', 'Vit_A_(µg)']
    categoriemul = [('Iron_(mg)', 10), ('Sodium_(mg)', 2), ('Zinc_(mg)', 20), ('Manganese_(mg)', 5)]
    categoriediv = [('Vit_A_(µg)', 5)]

    criteria = (df.Name == 'BLUEBERRIES,FRZ,SWTND') | (df.Name == 'BLUEBERRIES,FRZ,UNSWTND') | \
               (df.Name == 'BLUEBERRIES,RAW') | (df.Name == 'BLUEBERRIES,WILD,FRZ')
    df5 = df.loc[:, categories + ['Name']].copy()
    df5 = df5[criteria]

    df5 = df5.groupby('Name')[categories].agg('mean')
    print(df5)

    categoriesren = ['Fiber_TD_(g)', 'Calcium_(mg)', 'Iron_(mg)*10',
                  'Phosphorus_(mg)', 'Sodium_(mg)*2','Zinc_(mg)*20', 'Manganese_(mg)*5', 'Vit_A_(µg)/5']

    for cat in categoriediv:
        df5.loc['BLUEBERRIES,FRZ,SWTND', cat[0]] = df5.loc['BLUEBERRIES,FRZ,SWTND'][cat[0]] / cat[1]
        df5.loc['BLUEBERRIES,FRZ,UNSWTND', cat[0]] = df5.loc['BLUEBERRIES,FRZ,UNSWTND'][cat[0]] / cat[1]
        df5.loc['BLUEBERRIES,RAW', cat[0]] = df5.loc['BLUEBERRIES,RAW'][cat[0]] / cat[1]
        df5.loc['BLUEBERRIES,WILD,FRZ', cat[0]] = df5.loc['BLUEBERRIES,WILD,FRZ'][cat[0]] / cat[1]

    for cat in categoriemul:
        df5.loc['BLUEBERRIES,FRZ,SWTND', cat[0]] = df5.loc['BLUEBERRIES,FRZ,SWTND'][cat[0]] * cat[1]
        df5.loc['BLUEBERRIES,FRZ,UNSWTND', cat[0]] = df5.loc['BLUEBERRIES,FRZ,UNSWTND'][cat[0]] * cat[1]
        df5.loc['BLUEBERRIES,RAW', cat[0]] = df5.loc['BLUEBERRIES,RAW'][cat[0]] * cat[1]
        df5.loc['BLUEBERRIES,WILD,FRZ', cat[0]] = df5.loc['BLUEBERRIES,WILD,FRZ'][cat[0]] * cat[1]

    print(df5)
    fig = go.Figure()
    fig.add_trace(go.Scatterpolar(
        r=df5.loc['BLUEBERRIES,FRZ,SWTND'].values,
        theta=categoriesren,
        fill='toself',
        name='BLUEBERRIES,FRZ,SWTND'
    ))
    fig.add_trace(go.Scatterpolar(
        r=df5.loc['BLUEBERRIES,FRZ,UNSWTND'].values,
        theta=categoriesren,
        fill='toself',
        name='BLUEBERRIES,FRZ,UNSWTND'
    ))

    fig.add_trace(go.Scatterpolar(
        r=df5.loc['BLUEBERRIES,RAW'].values,
        theta=categoriesren,
        fill='toself',
        name='BLUEBERRIES,RAW'
    ))

    fig.add_trace(go.Scatterpolar(
        r=df5.loc['BLUEBERRIES,WILD,FRZ'].values,
        theta=categoriesren,
        fill='toself',
        name='BLUEBERRIES,WILD,FRZ'
    ))

    fig.update_layout(
        polar=dict(
            radialaxis=dict(
                visible=True,
            )),
        showlegend=True
    )

    fig.show()

exec3()
#'BLUEBERRIES,FRZ,SWTND', 'BLUEBERRIES,FRZ,UNSWTND', 'BLUEBERRIES,RAW', 'BLUEBERRIES,WILD,FRZ',
	import numpy as np
	import matplotlib.pyplot as plt
	from mpl_toolkits.mplot3d import Axes3D
	from sklearn.cluster import KMeans
	from sklearn.datasets import make_blobs
	import pandas as pd
	from sklearn import preprocessing as p
	import plotly.graph_objects as go

	pd.set_option('display.max_columns', 80)
	pd.set_option('display.max_rows', 300)
	pd.set_option('display.width', 1000)


	from matplotlib import colors
	colors = {0:'#1b9e77', 1:'#d95f02', 2:'#7570b3', 3:'#e7298a', 4:'#66a61e', 5:'#e6ab02', 6:'#a6761d', 7:'#666666'}

	df = pd.read_excel('USDA_Food_Database.xlsx')

	temp = df.copy().dropna() #earlycope to not remove keword and name colmn
	temp2 = df.copy().dropna()
	df.drop(df[['No', 'Name', 'Keyword']], axis=1, inplace=True)
	cols = df.columns
	df.dropna(inplace=True)


	puh2=p.MinMaxScaler().fit_transform(df) # numpy.Narray columnsnames get lost flatten out
	newdf2 = pd.DataFrame(puh2) #transform back into df
	newdf2.columns = cols #give columns names


	newdf2 = newdf2.loc[:,['Water_(g)', 'Sugar_Tot_(g)']].dropna() #locate only 2 features we want to cluister

	def fit_kmeans(data, centers):
	kmeans = KMeans(centers)
	labels = kmeans.fit_predict(data)
	return labels


	def make_charts(num, tempfunc=temp.copy()):

	"""makes first scatterplot"""
	labels = fit_kmeans(newdf2, num)
	tempfunc = tempfunc.loc[:, ['Keyword', 'Name', 'Water_(g)', 'Sugar_Tot_(g)']].dropna()
	tempfunc['Label'] = labels

	plt.title('Food Dataset K-Means Clustering')
	plt.ylabel('Sugar(g)')
	plt.xlabel('Water(g)')

	for g in np.unique(labels):
	ix = np.where(labels == g)
	ix = ix[0]
	plt.scatter(tempfunc.iloc[ix]['Water_(g)'], tempfunc.iloc[ix]['Sugar_Tot_(g)'], c=colors[g],
	label="Group " + str(g + 1),
	alpha=0.3)
	plt.legend()
	plt.show()

	"""creates the fig for the food type"""
	if num < 4:
	myplot = plt.subplots(num, 1, figsize=(14, 16))[1]

	for x in range(0, num):
	yo = tempfunc[tempfunc['Label'] == x].groupby('Keyword').count()

	counter = yo['Label'].sort_values().tail(10)
	counter = (counter / counter.sum()) * 100

	myplot[x].barh(counter.index, counter, color=colors[x])
	text_kwargs = dict(fontsize=26)
	myplot[x].set_title(f"Group {x + 1} in %", **text_kwargs)
	myplot[x].tick_params(axis="x", labelsize=30)
	myplot[x].tick_params(axis="y", labelsize=20)

	plt.show()
	elif num == 4:
	myplot = plt.subplots(2, 2, figsize=(24, 14))[1]
	myplot2 = np.append(myplot[0], myplot[1])

	for x in range(0, num):
	yo = tempfunc[tempfunc['Label'] == x].groupby('Keyword').count()

	counter = yo['Label'].sort_values().tail(10)
	counter = (counter / counter.sum()) * 100

	myplot2[x].barh(counter.index, counter, color=colors[x])
	text_kwargs = dict(fontsize=26)
	myplot2[x].set_title(f"Group {x + 1} in %", **text_kwargs)
	myplot2[x].tick_params(axis="x", labelsize=30)
	myplot2[x].tick_params(axis="y", labelsize=20)
	plt.show()

	elif num > 4 and num < 7:
	myplot = plt.subplots(3, 2, figsize=(24, 14))[1]
	myplot2 = [x for sublot in myplot for x in sublot]


	for x in range(0, num):
	yo = tempfunc[tempfunc['Label'] == x].groupby('Keyword').count()

	counter = yo['Label'].sort_values().tail(10)
	counter = (counter / counter.sum()) * 100

	myplot2[x].barh(counter.index, counter, color=colors[x])
	text_kwargs = dict(fontsize=26)
	myplot2[x].set_title(f"Group {x + 1} in %", **text_kwargs)
	myplot2[x].tick_params(axis="x", labelsize=24)
	myplot2[x].tick_params(axis="y", labelsize=18)

	plt.show()

	elif num > 6 and num < 10:
	myplot = plt.subplots(3, 3, figsize=(34, 18))[1]
	myplot2 = [x for sublot in myplot for x in sublot]

	for x in range(0, num):
	yo = tempfunc[tempfunc['Label'] == x].groupby('Keyword').count()

	counter = yo['Label'].sort_values().tail(10)
	counter = (counter / counter.sum()) * 100

	myplot2[x].barh(counter.index, counter, color=colors[x])
	text_kwargs = dict(fontsize=26)
	myplot2[x].set_title(f"Group {x + 1} in %", **text_kwargs)
	myplot2[x].tick_params(axis="x", labelsize=24)
	myplot2[x].tick_params(axis="y", labelsize=19)

	plt.show()


	for x in range(2,9):
	break#make_charts(x)


	def plot_score(tries: int):
	scores = []

	for center in range(1, tries):
	kmeans = KMeans(center)
	model = kmeans.fit(newdf2)
	scores.append(abs(model.score(newdf2)))

	centers = list(range(1, tries))

	plt.xticks(np.arange(0, tries, step=1))
	plt.plot(centers, scores)
	plt.title("scree Plot")
	plt.xlabel('Centers')
	plt.ylabel('Average DIstance From Centroid')
	plt.show()


	#plot_score(20)

	def exec2():

	from sklearn import preprocessing


	scaled = preprocessing.QuantileTransformer().fit_transform(temp2[['Fiber_TD_(g)']])
	scaled = pd.DataFrame(scaled, columns=['Fiber_TD_(g)'])

	temp2['Keyword'] = temp2['Keyword'].astype('category')

	temp2['Codes'] = temp2[['Keyword']].apply(lambda x: x.cat.codes)
	unique_codes = temp2[['Codes', 'Keyword']].drop_duplicates() #inplace false
	unique_codes = unique_codes.sort_values('Codes').set_index('Codes').to_dict()['Keyword']


	fig = go.Figure(data=go.Parcoords(
	line=dict(color = scaled['Fiber_TD_(g)'],
	colorscale = 'Viridis',
	showscale = True,
	cmin = 0,
	cmax=1,
	colorbar=dict(title=dict(text="Fiber_TD_(g)",
	font=dict(size=16)),
	tickmode="array",
	tickvals=[0,0.5,1],
	ticks='outside',
	ticktext=['Almost None', 'Not a lot', 'A lot'],
	tickfont=dict(size=yout14))),
	dimensions=list([
	dict(tickvals=[x for x in range(0,36)], ticktext=list(unique_codes.values()),
	label = 'FoodType', values=temp2['Codes']), #danger
	dict(range=[0, 100], label='Sugar_Tot_(g)', values=temp2['Sugar_Tot_(g)']),
	dict(range=[0, 89], label='Protein_(g)', values=temp2['Protein_(g)']),
	dict(range=[0, 33954], label='Beta_Carot_(µg)', values=temp2['Beta_Carot_(µg)']),
	dict(range=[0, 68466], label='Vit_A_(µg)', values=temp2['Vit_A_(µg)']),
	dict(range=[0, 15], label='Copper_(mg)', values=temp2['Copper_(mg)']),
	dict(range=[0, 781], label='Magnesium_(mg)', values=temp2['Magnesium_(mg)']),
	dict(range=[0, 100], label='Lipid_Tot_(g)', values=temp2['Lipid_Tot_(g)']),
	dict(range=[0, 84],
	label='FA_Mono_(g)', values=temp2['FA_Mono_(g)']),
	dict(range=[0, 902], label='Energy_(kcal)', values=temp2['Energy_(kcal)']),
	dict(range=[0, 100],
	label='Water_(g)', values=temp2['Water_(g)']),
	])
	))
	fig.show()
	"""Excercise 2"""

	import plotly.figure_factory as ff
	import numpy as np

	numpied_df = df.to_numpy()


	hist_data = [numpied_df.T[0]]

	group_labels = ['Water_(g)']

	# Create distplot with custom bin_size
	fig = ff.create_distplot(hist_data, group_labels, bin_size=.4, curve_type='normal')
	fig.update_layout(title_text='Normal Distribution Curve Plot')
	fig.show()


	cov_data = np.corrcoef(numpied_df.T)
	column_categ = df.columns

	fig = go.Figure(data=go.Heatmap(
	z=cov_data,x=column_categ, y=column_categ, colorscale='greys'
	))
	fig.update_layout(title={
	'text':'Pearson Product-Moment Correlation Coefficients Heatmap',
	'font':{'size':56, 'color':'#110a54'},
	'xanchor':'center',
	'yanchor':'top',
	'y':0.975,
	'x':0.5


	})
	fig.show()



	#exec2()


	def exec3():
	from itertools import groupby
	from operator import itemgetter
	df = pd.read_excel('USDA_Food_Database.xlsx')
	#print(df.head()) #inspect some first data
	#print(df.info()) #find memory usage and the null values on specific columns
	#print(df.describe(include=[np.number, 'object']).T) # find unique keywords and percentiles and top keywords(Vegies)
	df3 = df.set_index(['Keyword', 'Name'])


	#print(df.count(level='Keyword'))
	df_index_combination_tuple = df3.sort_values('Keyword')["No"].to_dict().keys()
	df_index_combination_tuple = list(df_index_combination_tuple)


	#Creates list of subvalues f.e. [Alcohol,[highpercentagealc,lowpercentagealc,alc from companyA,alc from companyB]]
	df_names_combination_nested_list_tuple = [(k, [x for _, x in g]) for k, g in groupby(df_index_combination_tuple, itemgetter(0))]
	#spectate subvalues


	"""Exercice 3 pick 1 failed try"""
	picked_categories = ['GOOSE', 'DUCK']
	id_list = ['GOOSE', 'DUCK']
	swap_list =["", ""]

	split_here = []
	for category in df_names_combination_nested_list_tuple:
	if category[0] in picked_categories:
	id_list += category[1]
	split_here.append(len(id_list))

	for k in range(2, len(id_list)):

	if k < split_here[0]:
	swap_list.append("DUCK")
	else:
	swap_list.append("GOOSE")

	fig=go.Figure(go.Sunburst(
	labels=id_list,
	parents=swap_list,
	marker=dict(colors=['#e5f5f9', '#99d8c9'])
	))
	fig.update_layout(margin = dict(t=0, l=0, r=0, b=0))
	fig.show()

	"""Ex. 3 non failed try"""

	categories = ['Water_(g)', 'Energy_(kcal)', 'Protein_(g)', 'Lipid_Tot_(g)', 'Thiamin_(mg)', 'Niacin_(mg)',
	'Selenium_(µg)', 'Iron_(mg)']
	df3 = df.loc[:, categories + ['Keyword', 'Name']].copy()
	criteria = (df.Keyword == 'DUCK') \| (df.Keyword == 'GOOSE')
	df3 = df3[criteria]
	""""min max mean median sum count std var
	size describe nunique idxmin idxmax"""
	grouped = df3.groupby('Keyword')[categories].agg('mean')

	grouped.loc['DUCK']['Energy_(kcal)'] = grouped.loc['DUCK']['Energy_(kcal)']/20
	grouped.loc['GOOSE']['Energy_(kcal)'] = grouped.loc['GOOSE']['Energy_(kcal)'] / 20

	grouped.loc['DUCK']['Water_(g)'] = grouped.loc['DUCK']['Water_(g)'] / 3
	grouped.loc['GOOSE']['Water_(g)'] = grouped.loc['GOOSE']['Water_(g)'] / 3

	grouped.loc['DUCK']['Iron_(mg)'] = grouped.loc['DUCK']['Iron_(mg)'] * 2
	grouped.loc['GOOSE']['Iron_(mg)'] = grouped.loc['GOOSE']['Iron_(mg)'] * 2

	grouped.loc['DUCK']['Thiamin_(mg)'] = grouped.loc['DUCK']['Thiamin_(mg)'] * 50
	grouped.loc['GOOSE']['Thiamin_(mg)'] = grouped.loc['GOOSE']['Thiamin_(mg)'] * 50

	categories = ['Water_(g)/3', 'Energy_(kcal)/20', 'Protein_(g)', 'Lipid_Tot_(g)', 'Thiamin_(mg)*50', 'Niacin_(mg)',
	'Selenium_(µg)', 'Iron_(mg)*2']


	fig = go.Figure()
	fig.add_trace(go.Scatterpolar(
	r=grouped.loc['DUCK'].values,
	theta=categories,
	fill='toself',
	name='DUCK'
	))
	fig.add_trace(go.Scatterpolar(
	r=grouped.loc['GOOSE'].values,
	theta=categories,
	fill='toself',
	name='GOOSSE'
	))
	fig.update_layout(
	polar=dict(
	radialaxis=dict(
	visible=True,
	)),
	showlegend=True
	)

	fig.show()

	"""AVOCADO"""
	categories = ['Water_(g)', 'Energy_(kcal)', 'Lipid_Tot_(g)', 'Carbohydrt_(g)','Sugar_Tot_(g)', 'Phosphorus_(mg)',
	'Potassium_(mg)', 'Vit_B6_(mg)', 'Thiamin_(mg)', 'Niacin_(mg)', 'Iron_(mg)', 'Vit_E_(mg)']

	categoriediv = [('Water_(g)',5), ('Energy_(kcal)', 10), ('Phosphorus_(mg)', 5), ('Potassium_(mg)', 50)]
	categoriemul = [('Sugar_Tot_(g)', 4), ('Vit_B6_(mg)', 10), ('Thiamin_(mg)', 100), ('Iron_(mg)', 10)]
	df4 = df.loc[:, categories + ['Keyword', 'Name']].copy() #categories

	criteria = (df.Name == 'AVOCADOS,RAW,ALL COMM VAR') \| (df.Name == 'AVOCADOS,RAW,CALIFORNIA') \|\
	(df.Name == 'AVOCADOS,RAW,FLORIDA')
	df4 = df4[criteria]
	df4 = df4.groupby('Name')[categories].agg('mean')
	categories = ['Water_(g)/5', 'Energy_(kcal)/10', 'Lipid_Tot_(g)', 'Carbohydrt_(g)', 'Sugar_Tot_(g)*4', 'Phosphorus_(mg)/5',
	'Potassium_(mg)/50', 'Vit_B6_(mg)10', 'Thiamin_(mg)100', 'Niacin_(mg)', 'Iron_(mg)*10', 'Vit_E_(mg)']

	for cat in categoriediv:
	df4.loc['AVOCADOS,RAW,ALL COMM VAR', cat[0]] = df4.loc['AVOCADOS,RAW,ALL COMM VAR'][cat[0]] / cat[1]
	df4.loc['AVOCADOS,RAW,CALIFORNIA', cat[0]] = df4.loc['AVOCADOS,RAW,CALIFORNIA'][cat[0]] / cat[1]
	df4.loc['AVOCADOS,RAW,FLORIDA', cat[0]] = df4.loc['AVOCADOS,RAW,FLORIDA'][cat[0]] / cat[1]
	for cat in categoriemul:
	df4.loc['AVOCADOS,RAW,ALL COMM VAR', cat[0]] = df4.loc['AVOCADOS,RAW,ALL COMM VAR'][cat[0]] * cat[1]
	df4.loc['AVOCADOS,RAW,CALIFORNIA', cat[0]] = df4.loc['AVOCADOS,RAW,CALIFORNIA'][cat[0]] * cat[1]
	df4.loc['AVOCADOS,RAW,FLORIDA', cat[0]] = df4.loc['AVOCADOS,RAW,FLORIDA'][cat[0]] * cat[1]



	fig = go.Figure()
	fig.add_trace(go.Scatterpolar(
	r=df4.loc['AVOCADOS,RAW,ALL COMM VAR'].values,
	theta=categories,
	fill='toself',
	name='AVOCADOS,RAW,ALL COMM VAR'
	))
	fig.add_trace(go.Scatterpolar(
	r=df4.loc['AVOCADOS,RAW,CALIFORNIA'].values,
	theta=categories,
	fill='toself',
	name='AVOCADOS,RAW,CALIFORNIA'
	))

	fig.add_trace(go.Scatterpolar(
	r=df4.loc['AVOCADOS,RAW,FLORIDA'].values,
	theta=categories,
	fill='toself',
	name='AVOCADOS,RAW,FLORIDA'
	))

	fig.update_layout(
	polar=dict(
	radialaxis=dict(
	visible=True,
	)),
	showlegend=True
	)

	fig.show()

	"""'BLUEBERRIES,FRZ,SWTND', 'BLUEBERRIES,FRZ,UNSWTND', 'BLUEBERRIES,RAW', 'BLUEBERRIES,WILD,FRZ'"""
	categories = ['Fiber_TD_(g)', 'Calcium_(mg)', 'Iron_(mg)',
	'Phosphorus_(mg)', 'Sodium_(mg)', 'Zinc_(mg)', 'Manganese_(mg)', 'Vit_A_(µg)']
	categoriemul = [('Iron_(mg)', 10), ('Sodium_(mg)', 2), ('Zinc_(mg)', 20), ('Manganese_(mg)', 5)]
	categoriediv = [('Vit_A_(µg)', 5)]

	criteria = (df.Name == 'BLUEBERRIES,FRZ,SWTND') \| (df.Name == 'BLUEBERRIES,FRZ,UNSWTND') \| \
	(df.Name == 'BLUEBERRIES,RAW') \| (df.Name == 'BLUEBERRIES,WILD,FRZ')
	df5 = df.loc[:, categories + ['Name']].copy()
	df5 = df5[criteria]

	df5 = df5.groupby('Name')[categories].agg('mean')
	print(df5)

	categoriesren = ['Fiber_TD_(g)', 'Calcium_(mg)', 'Iron_(mg)*10',
	'Phosphorus_(mg)', 'Sodium_(mg)2','Zinc_(mg)20', 'Manganese_(mg)*5', 'Vit_A_(µg)/5']

	for cat in categoriediv:
	df5.loc['BLUEBERRIES,FRZ,SWTND', cat[0]] = df5.loc['BLUEBERRIES,FRZ,SWTND'][cat[0]] / cat[1]
	df5.loc['BLUEBERRIES,FRZ,UNSWTND', cat[0]] = df5.loc['BLUEBERRIES,FRZ,UNSWTND'][cat[0]] / cat[1]
	df5.loc['BLUEBERRIES,RAW', cat[0]] = df5.loc['BLUEBERRIES,RAW'][cat[0]] / cat[1]
	df5.loc['BLUEBERRIES,WILD,FRZ', cat[0]] = df5.loc['BLUEBERRIES,WILD,FRZ'][cat[0]] / cat[1]

	for cat in categoriemul:
	df5.loc['BLUEBERRIES,FRZ,SWTND', cat[0]] = df5.loc['BLUEBERRIES,FRZ,SWTND'][cat[0]] * cat[1]
	df5.loc['BLUEBERRIES,FRZ,UNSWTND', cat[0]] = df5.loc['BLUEBERRIES,FRZ,UNSWTND'][cat[0]] * cat[1]
	df5.loc['BLUEBERRIES,RAW', cat[0]] = df5.loc['BLUEBERRIES,RAW'][cat[0]] * cat[1]
	df5.loc['BLUEBERRIES,WILD,FRZ', cat[0]] = df5.loc['BLUEBERRIES,WILD,FRZ'][cat[0]] * cat[1]

	print(df5)
	fig = go.Figure()
	fig.add_trace(go.Scatterpolar(
	r=df5.loc['BLUEBERRIES,FRZ,SWTND'].values,
	theta=categoriesren,
	fill='toself',
	name='BLUEBERRIES,FRZ,SWTND'
	))
	fig.add_trace(go.Scatterpolar(
	r=df5.loc['BLUEBERRIES,FRZ,UNSWTND'].values,
	theta=categoriesren,
	fill='toself',
	name='BLUEBERRIES,FRZ,UNSWTND'
	))

	fig.add_trace(go.Scatterpolar(
	r=df5.loc['BLUEBERRIES,RAW'].values,
	theta=categoriesren,
	fill='toself',
	name='BLUEBERRIES,RAW'
	))

	fig.add_trace(go.Scatterpolar(
	r=df5.loc['BLUEBERRIES,WILD,FRZ'].values,
	theta=categoriesren,
	fill='toself',
	name='BLUEBERRIES,WILD,FRZ'
	))

	fig.update_layout(
	polar=dict(
	radialaxis=dict(
	visible=True,
	)),
	showlegend=True
	)

	fig.show()

	exec3()
	#'BLUEBERRIES,FRZ,SWTND', 'BLUEBERRIES,FRZ,UNSWTND', 'BLUEBERRIES,RAW', 'BLUEBERRIES,WILD,FRZ',