Skip to content

Instantly share code, notes, and snippets.

View srang992's full-sized avatar

Subhradeep Rang srang992

  • India
View GitHub Profile
# Navigate to your desired directory. Here I am using D
$ d:
# make the directory named 'ChromeCustomHomepage'
$ mkdir ChromeCustomHomepage
# Navigate to the newly created Directory
$ cd ChromeCustomHomepage
# install 'virtualenv' if you don't have it installed
# split those ingredients values
choco_data['ingredients'] = choco_data['ingredients'].str.strip(' ')
choco_data['num_ingredients'] = choco_data['ingredients'].str.split('-', expand=True)[0]
choco_data['main_ingredients'] = choco_data['ingredients'].str.split('-', expand=True)[1]
choco_data['main_ingredients'] = choco_data['main_ingredients'].str.strip(' ')
# encoding the values
ingre_encode = choco_data['main_ingredients'].str.get_dummies(sep=',')
# concatenating lecithin column with the main data. Containing lecithin denoted by 1
# removing the misspelled word
taste_codo['nutty'] = taste_codo['nut'] + taste_codo['nuts'] + taste_codo['nutty']
taste_codo['woody'] = taste_codo['woodsy'] + taste_codo['woody']
taste_codo['earthy'] = taste_codo['earth'] + taste_codo['earthy']
taste_codo.drop(['nut', 'nuts', 'woodsy', 'earth'], axis=1, inplace=True)
# making the taste dictionary
tasty_dict = {}
tasty_list = list(taste_codo.columns)
for taste in tasty_list:
# filtering those chocolates which are manufactured by Soma Chocomaker
soma_choco_data = choco_data[choco_data['manufacturer'].isin(['Soma'])]
# creating the dictionary
bean_dict = {}
bean_origins = list(soma_choco_data['bean_origin'])
for origin in bean_origins:
if origin in bean_dict:
bean_dict[origin] += 1
else:
# taking the all cocoa percentages uses in chocolates of soma chocomaker
cocoa_list = list(soma_choco_data['cocoa_percent'])
# creating a dictionary
cocoa_percent_dict = {}
for cocoa_percent in cocoa_list:
if str(cocoa_percent) in cocoa_percent_dict:
cocoa_percent_dict[str(cocoa_percent)] += 1
else:
cocoa_percent_dict[str(cocoa_percent)] = 1
# removing those manufacturer whose count is less than 10
choco_data_with_sec_count = count_df(choco_data, 'manufacturer')
choco_data_mod2 = choco_data_with_sec_count[choco_data_with_sec_count['count'] > 10]
# grouping the data by manufacturer and calculating the avg. mean for each of them
avg_rating_by_company = choco_data_mod2.groupby('manufacturer')['rating'].mean()
avg_rating_by_company_df = avg_rating_by_company.rename_axis('Company').reset_index(name='Rating')
avg_rating_by_company_df_sorted = avg_rating_by_company_df.sort_values(by='Rating', ascending=False).head(10)
# adding title and plotting the data
# listing all the tastes
tastes = list(taste_encode.columns)
taste_dict = {}
# taking the sum of the values of those taste columns to understand how many people are agreed with that taste
for taste in tastes:
taste_dict[taste] = sum(taste_encode[taste])
# sorting the taste dictionary in decending order
taste_dict = sort_sliced_dict(taste_dict, is_reverse=True, item_count=8)
# filtering the taste of Kokoa Kamili and separating the values
taste_encode = choco_data[choco_data['bar_name'].isin(['Kokoa Kamili'])]['taste'].str.get_dummies(sep=', ')
# fixing some of the values whose pronunciation is wrong
taste_encode['nuts'] = taste_encode['nut'] + taste_encode['nuts']
taste_encode['rich_cocoa'] = taste_encode['rich'] + taste_encode['rich cocoa'] + taste_encode['rich cooa']
# dropping the columns containing wrong pronunciation
taste_encode.drop(['nut', 'rich', 'rich cooa'], axis=1, inplace=True)
# filtering those companies where Kokoa Kamili is manufactured
company_loc_list = list(choco_data[choco_data['bar_name'].isin(['Kokoa Kamili'])]['company_loc'])
company_loc_dict = {i:company_loc_list.count(i) for i in company_loc_list}
# plotting the data
fig = px.pie(values=list(company_loc_dict.values()),
names=list(company_loc_dict.keys()),
title='Most Common Location where Kokoa Kamili is Manufactured',
color_discrete_sequence=px.colors.sequential.Aggrnyl)
import plotly.express as px
import plotly.graph_objects as go
# removing those chocolates which has a count less than 10
choco_data_with_count = count_df(choco_data, 'bar_name')
choco_data_mod = choco_data_with_count[choco_data_with_count['count'] >= 10]
# grouping chocolates according to bar_name and calculating mean
avg_rating_by_bar = choco_data_mod.groupby('bar_name')['rating'].mean()
avg_rating_by_bar_df = avg_rating_by_bar.rename_axis('bar_name').reset_index(name='rating')