Kiprono Elijah Koech kipronokoech

## file.py
# Python3 code to demonstrate
# convert dictionary string to dictionary
# using json.loads()
import json

# initializing string
string = '{"Kiprono" : 67, "Bob" : 76, "Alice" : 88}'

# printing original string
print(string)

## file2.py
# Python3 code to demonstrate
# convert dictionary string to dictionary
# using json.loads()
import ast

# initializing string
string = '{"Kiprono" : 67, "Bob" : 76, "Alice" : 88}'

# printing original string
print(string)

## runtime.py
import pandas as pd
import numpy as np

# generate 15000 intergeter data points randomly between -1000 and 1000
col1= np.random.randint(-1000,1000,15000)

#dictionary of data
data = {
    "col1" : col1,
}

## groupby.py
# Q1 : Determine the average age per occupation.

df.groupby('occupation').age.mean()
#-----------------OUTPUT------------------------
# occupation
# other         43
# technician    24
# writer        23
# Name: age, dtype: int64
#-----------------END OF OUTPUT-----------------

## scraper01.py
# Import necessary packages
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re
# Site URL
url="https://en.wikipedia.org/wiki/List_of_countries_by_GDP_(nominal)_per_capita"

# Make a GET request to fetch the raw HTML content
html_content = requests.get(url).text

## scraper02.py
# On site there are 3 tables with the class "wikitable"
# The following line will generate a list of HTML content for each table
gdp = soup.find_all("table", attrs={"class": "wikitable"})
print("Number of tables on site: ",len(gdp))

## scraper03.py
# Lets go ahead and scrape first table with HTML code gdp[0]
table1 = gdp[0]
# the head will form our column names
body = table1.find_all("tr")
# Head values (Column names) are the first items of the body list
head = body[0] # 0th item is the header row
body_rows = body[1:] # All other items becomes the rest of the rows

# Lets now iterate through the head HTML code and make list of clean headings

## scraper04.py
# Next is now to loop though the rest of the rows

#print(body_rows[0])
all_rows = [] # will be a list for list for all rows
for row_num in range(len(body_rows)): # A row at a time
    row = [] # this will old entries for one row
    for row_item in body_rows[row_num].find_all("td"): #loop through all row entries
        # row_item.text removes the tags from the entries
        # the following regex is to remove \xa0 and \n and comma from row_item.text
        # xa0 encodes the flag, \n is the newline and comma separates thousands in numbers

## scraper05.py
# We can now use the data on all_rowsa and headings to make a table
# all_rows becomes our data and headings the column names
df = pd.DataFrame(data=all_rows,columns=headings)
df.head()

## vectorize.py
# import library to use to vectorize
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

# Vectorization on sklearn - simple example
corpus = [
    "Excellent Services by the ABC remit team.Recommend.",
    "Bad Services. Transaction delayed for three days.Don't recommend."]
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(corpus)
#print(X) #this is just a matrix with position as tuple and token in that position
	# Python3 code to demonstrate
	# convert dictionary string to dictionary
	# using json.loads()
	import json

	# initializing string
	string = '{"Kiprono" : 67, "Bob" : 76, "Alice" : 88}'

	# printing original string
	print(string)
	import pandas as pd
	import numpy as np

	# generate 15000 intergeter data points randomly between -1000 and 1000
	col1= np.random.randint(-1000,1000,15000)

	#dictionary of data
	data = {
	"col1" : col1,
	}
	# Q1 : Determine the average age per occupation.

	df.groupby('occupation').age.mean()
	#-----------------OUTPUT------------------------
	# occupation
	# other 43
	# technician 24
	# writer 23
	# Name: age, dtype: int64
	#-----------------END OF OUTPUT-----------------
	# Import necessary packages
	from bs4 import BeautifulSoup
	import requests
	import pandas as pd
	import re
	# Site URL
	url="https://en.wikipedia.org/wiki/List_of_countries_by_GDP_(nominal)_per_capita"

	# Make a GET request to fetch the raw HTML content
	html_content = requests.get(url).text
	# On site there are 3 tables with the class "wikitable"
	# The following line will generate a list of HTML content for each table
	gdp = soup.find_all("table", attrs={"class": "wikitable"})
	print("Number of tables on site: ",len(gdp))
	# Lets go ahead and scrape first table with HTML code gdp[0]
	table1 = gdp[0]
	# the head will form our column names
	body = table1.find_all("tr")
	# Head values (Column names) are the first items of the body list
	head = body[0] # 0th item is the header row
	body_rows = body[1:] # All other items becomes the rest of the rows

	# Lets now iterate through the head HTML code and make list of clean headings
	# Next is now to loop though the rest of the rows

	#print(body_rows[0])
	all_rows = [] # will be a list for list for all rows
	for row_num in range(len(body_rows)): # A row at a time
	row = [] # this will old entries for one row
	for row_item in body_rows[row_num].find_all("td"): #loop through all row entries
	# row_item.text removes the tags from the entries
	# the following regex is to remove \xa0 and \n and comma from row_item.text
	# xa0 encodes the flag, \n is the newline and comma separates thousands in numbers
	# We can now use the data on all_rowsa and headings to make a table
	# all_rows becomes our data and headings the column names
	df = pd.DataFrame(data=all_rows,columns=headings)
	df.head()
	# import library to use to vectorize
	from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

	# Vectorization on sklearn - simple example
	corpus = [
	"Excellent Services by the ABC remit team.Recommend.",
	"Bad Services. Transaction delayed for three days.Don't recommend."]
	vectorizer = CountVectorizer()
	X = vectorizer.fit_transform(corpus)
	#print(X) #this is just a matrix with position as tuple and token in that position