Subhradeep Rang srang992

## output.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                srang992
                / output.md
            
            
              Created
              September 28, 2024 14:34
            
          
    To read a CSV (Comma-Separated Values) file into a Pandas DataFrame, you can use the read_csv function from the pandas library. Here's how you can do it:


First, make sure you have the pandas library installed. If not, you can install it using pip:
pip install pandas


Use the following code to import a CSV file into a DataFrame:


## requirements.txt
langchain
langchain-groq
pymupdf
huggingface-hub
faiss-cpu
sentence-transformers

## project_setup.sh
# Navigate to your desired directory. Here I am using D
$ d:

# make the directory named 'ChromeCustomHomepage'
$ mkdir ChromeCustomHomepage

# Navigate to the newly created Directory
$ cd ChromeCustomHomepage

# install 'virtualenv' if you don't have it installed

## lecithin.py
# split those ingredients values
choco_data['ingredients'] = choco_data['ingredients'].str.strip(' ')
choco_data['num_ingredients'] = choco_data['ingredients'].str.split('-', expand=True)[0]
choco_data['main_ingredients'] = choco_data['ingredients'].str.split('-', expand=True)[1]
choco_data['main_ingredients'] = choco_data['main_ingredients'].str.strip(' ')

# encoding the values
ingre_encode = choco_data['main_ingredients'].str.get_dummies(sep=',')

# concatenating lecithin column with the main data. Containing lecithin denoted by 1

## taste_misspelled.py
# removing the misspelled word
taste_codo['nutty'] = taste_codo['nut'] + taste_codo['nuts'] + taste_codo['nutty']
taste_codo['woody'] = taste_codo['woodsy'] + taste_codo['woody']
taste_codo['earthy'] = taste_codo['earth'] + taste_codo['earthy']
taste_codo.drop(['nut', 'nuts', 'woodsy', 'earth'], axis=1, inplace=True)

# making the taste dictionary
tasty_dict = {}
tasty_list = list(taste_codo.columns)
for taste in tasty_list:

## how_much_beans.py
# filtering those chocolates which are manufactured by Soma Chocomaker
soma_choco_data = choco_data[choco_data['manufacturer'].isin(['Soma'])]

# creating the dictionary
bean_dict = {}
bean_origins = list(soma_choco_data['bean_origin'])
for origin in bean_origins:
  if origin in bean_dict:
    bean_dict[origin] += 1
  else:

## how_much_cocoa.py
# taking the all cocoa percentages uses in chocolates of soma chocomaker
cocoa_list = list(soma_choco_data['cocoa_percent'])

# creating a dictionary
cocoa_percent_dict = {}
for cocoa_percent in cocoa_list:
  if str(cocoa_percent) in cocoa_percent_dict:
    cocoa_percent_dict[str(cocoa_percent)] += 1
  else:
    cocoa_percent_dict[str(cocoa_percent)] = 1

## good_manufacturer.py
# removing those manufacturer whose count is less than 10
choco_data_with_sec_count = count_df(choco_data, 'manufacturer')
choco_data_mod2 = choco_data_with_sec_count[choco_data_with_sec_count['count'] > 10]

# grouping the data by manufacturer and calculating the avg. mean for each of them
avg_rating_by_company = choco_data_mod2.groupby('manufacturer')['rating'].mean()
avg_rating_by_company_df = avg_rating_by_company.rename_axis('Company').reset_index(name='Rating')
avg_rating_by_company_df_sorted = avg_rating_by_company_df.sort_values(by='Rating', ascending=False).head(10)

# adding title and plotting the data

## taste_plot.py
# listing all the tastes
tastes = list(taste_encode.columns)
taste_dict = {}

# taking the sum of the values of those taste columns to understand how many people are agreed with that taste
for taste in tastes:
  taste_dict[taste] = sum(taste_encode[taste])

# sorting the taste dictionary in decending order
taste_dict = sort_sliced_dict(taste_dict, is_reverse=True, item_count=8)

## Taste_split.py
# filtering the taste of Kokoa Kamili and separating the values
taste_encode = choco_data[choco_data['bar_name'].isin(['Kokoa Kamili'])]['taste'].str.get_dummies(sep=', ')

# fixing some of the values whose pronunciation is wrong
taste_encode['nuts'] = taste_encode['nut'] + taste_encode['nuts']
taste_encode['rich_cocoa'] = taste_encode['rich'] + taste_encode['rich cocoa'] + taste_encode['rich cooa']

# dropping the columns containing wrong pronunciation
taste_encode.drop(['nut', 'rich', 'rich cooa'], axis=1, inplace=True)
	langchain
	langchain-groq
	pymupdf
	huggingface-hub
	faiss-cpu
	sentence-transformers
	# Navigate to your desired directory. Here I am using D
	$ d:

	# make the directory named 'ChromeCustomHomepage'
	$ mkdir ChromeCustomHomepage

	# Navigate to the newly created Directory
	$ cd ChromeCustomHomepage

	# install 'virtualenv' if you don't have it installed
	# split those ingredients values
	choco_data['ingredients'] = choco_data['ingredients'].str.strip(' ')
	choco_data['num_ingredients'] = choco_data['ingredients'].str.split('-', expand=True)[0]
	choco_data['main_ingredients'] = choco_data['ingredients'].str.split('-', expand=True)[1]
	choco_data['main_ingredients'] = choco_data['main_ingredients'].str.strip(' ')

	# encoding the values
	ingre_encode = choco_data['main_ingredients'].str.get_dummies(sep=',')

	# concatenating lecithin column with the main data. Containing lecithin denoted by 1
	# removing the misspelled word
	taste_codo['nutty'] = taste_codo['nut'] + taste_codo['nuts'] + taste_codo['nutty']
	taste_codo['woody'] = taste_codo['woodsy'] + taste_codo['woody']
	taste_codo['earthy'] = taste_codo['earth'] + taste_codo['earthy']
	taste_codo.drop(['nut', 'nuts', 'woodsy', 'earth'], axis=1, inplace=True)

	# making the taste dictionary
	tasty_dict = {}
	tasty_list = list(taste_codo.columns)
	for taste in tasty_list:
	# filtering those chocolates which are manufactured by Soma Chocomaker
	soma_choco_data = choco_data[choco_data['manufacturer'].isin(['Soma'])]

	# creating the dictionary
	bean_dict = {}
	bean_origins = list(soma_choco_data['bean_origin'])
	for origin in bean_origins:
	if origin in bean_dict:
	bean_dict[origin] += 1
	else:
	# taking the all cocoa percentages uses in chocolates of soma chocomaker
	cocoa_list = list(soma_choco_data['cocoa_percent'])

	# creating a dictionary
	cocoa_percent_dict = {}
	for cocoa_percent in cocoa_list:
	if str(cocoa_percent) in cocoa_percent_dict:
	cocoa_percent_dict[str(cocoa_percent)] += 1
	else:
	cocoa_percent_dict[str(cocoa_percent)] = 1
	# removing those manufacturer whose count is less than 10
	choco_data_with_sec_count = count_df(choco_data, 'manufacturer')
	choco_data_mod2 = choco_data_with_sec_count[choco_data_with_sec_count['count'] > 10]

	# grouping the data by manufacturer and calculating the avg. mean for each of them
	avg_rating_by_company = choco_data_mod2.groupby('manufacturer')['rating'].mean()
	avg_rating_by_company_df = avg_rating_by_company.rename_axis('Company').reset_index(name='Rating')
	avg_rating_by_company_df_sorted = avg_rating_by_company_df.sort_values(by='Rating', ascending=False).head(10)

	# adding title and plotting the data
	# listing all the tastes
	tastes = list(taste_encode.columns)
	taste_dict = {}

	# taking the sum of the values of those taste columns to understand how many people are agreed with that taste
	for taste in tastes:
	taste_dict[taste] = sum(taste_encode[taste])

	# sorting the taste dictionary in decending order
	taste_dict = sort_sliced_dict(taste_dict, is_reverse=True, item_count=8)
	# filtering the taste of Kokoa Kamili and separating the values
	taste_encode = choco_data[choco_data['bar_name'].isin(['Kokoa Kamili'])]['taste'].str.get_dummies(sep=', ')

	# fixing some of the values whose pronunciation is wrong
	taste_encode['nuts'] = taste_encode['nut'] + taste_encode['nuts']
	taste_encode['rich_cocoa'] = taste_encode['rich'] + taste_encode['rich cocoa'] + taste_encode['rich cooa']

	# dropping the columns containing wrong pronunciation
	taste_encode.drop(['nut', 'rich', 'rich cooa'], axis=1, inplace=True)