focaalvarez

## Pubs Uk.py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator

pubs=pd.read_excel('open_pubs.xlsx',sheet_name='open_pubs',converters={'name':str})
cities=pd.read_excel('open_pubs.xlsx',sheet_name='population')

#transform to lower case and transform to strings
pubs['name']=pubs['name'].str.lower()

## Pubs Uk2.py
#count words in names
pubs['word_count'] = pubs['name'].apply(lambda x: len(str(x).split(" ")))
pubs.hist(column=['word_count'],grid=False,bins=10)
print('The average lenght of the Pub names is ',str(pubs['word_count'].mean()),' words.')

## Pubs Uk3.py
#create word cloud
wordcloud = WordCloud(max_words=150, background_color="white",stopwords=stopwords,normalize_plurals=False).generate(text)
plt.figure(dpi=500)
plt.imshow(wordcloud, interpolation="bicubic")
plt.axis("off")
plt.show()
wordcloud.to_file("pub_wordcloud.png")

## Pubs Uk4.py
#load lists
animals=pd.read_excel('animals.xlsx',sheetname='animals',header=None)
animals="|".join(animals[0])
colors=pd.read_excel('animals.xlsx',sheetname='colors',header=None)
colors="|".join(colors[0])
royal=pd.read_excel('animals.xlsx',sheetname='royal',header=None)
royal="|".join(royal[0])
sports=pd.read_excel('animals.xlsx',sheetname='sports',header=None)
sports="|".join(sports[0])

## Pubs Geo 5.py
data = [
    go.Scattermapbox(
        lat=pubs['latitute'],
        lon=pubs['longitude'],
        mode='markers',
        marker=go.scattermapbox.Marker(
            size=1.5
        ),
        text=pubs['name'],
    )

## Pubs Geo 6.py
#get the regional Postcode only
pubs['Postcode']=pubs['zip_code'].str.split(' ').str[0]

#Create a Grouped DF by Postcode
pubs_by_postcode=pd.DataFrame(data=pubs['Postcode'].value_counts())
pubs_by_postcode.reset_index(inplace=True)
pubs_by_postcode.columns=['Postcode','Pubs']

#Append Population and Coordinates; calculate Pubs per 1.000 people
pubs_by_postcode=pubs_by_postcode.merge(cities[['Postcode', 'Latitude', 'Longitude','Town/Area','Population']],how='left',on='Postcode')

## Classifier Comparison.py
#Import and create all models. Tune appropiate parameters for each model
from sklearn.tree import DecisionTreeClassifier
tree = DecisionTreeClassifier(max_depth=4,random_state=0)

from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression(C=10)

from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=1)


## Classifier Comparison 2.py
#Train Models and get score
train_scores=[]
test_scores=[]
for model in model_list:
    model.fit(X_train, y_train)
    train_scores.append(model.score(X_train, y_train))
    test_scores.append(model.score(X_test, y_test))

scores_df= pd.DataFrame(
    {'Model Name': ['tree','logreg','knn','naive_b','forest','gbrt','mlp','svc'],

## vlookup1.py
# Add only 1 column (City)
table1_city=table1.merge(table2[['Person ID','City']],how='left',on='Person ID')

#Add all columns of table 2 (City and language)
table1_all=table1.merge(table2,how='left',on='Person ID')

## vlookup2.py

#One liner to fill Nans
table1_fill_nans=table1.merge(table2,how='left',on='Person ID').fillna('Unknown')
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator

	pubs=pd.read_excel('open_pubs.xlsx',sheet_name='open_pubs',converters={'name':str})
	cities=pd.read_excel('open_pubs.xlsx',sheet_name='population')

	#transform to lower case and transform to strings
	pubs['name']=pubs['name'].str.lower()
	#count words in names
	pubs['word_count'] = pubs['name'].apply(lambda x: len(str(x).split(" ")))
	pubs.hist(column=['word_count'],grid=False,bins=10)
	print('The average lenght of the Pub names is ',str(pubs['word_count'].mean()),' words.')
	#create word cloud
	wordcloud = WordCloud(max_words=150, background_color="white",stopwords=stopwords,normalize_plurals=False).generate(text)
	plt.figure(dpi=500)
	plt.imshow(wordcloud, interpolation="bicubic")
	plt.axis("off")
	plt.show()
	wordcloud.to_file("pub_wordcloud.png")
	#load lists
	animals=pd.read_excel('animals.xlsx',sheetname='animals',header=None)
	animals="\|".join(animals[0])
	colors=pd.read_excel('animals.xlsx',sheetname='colors',header=None)
	colors="\|".join(colors[0])
	royal=pd.read_excel('animals.xlsx',sheetname='royal',header=None)
	royal="\|".join(royal[0])
	sports=pd.read_excel('animals.xlsx',sheetname='sports',header=None)
	sports="\|".join(sports[0])
	data = [
	go.Scattermapbox(
	lat=pubs['latitute'],
	lon=pubs['longitude'],
	mode='markers',
	marker=go.scattermapbox.Marker(
	size=1.5
	),
	text=pubs['name'],
	)
	#get the regional Postcode only
	pubs['Postcode']=pubs['zip_code'].str.split(' ').str[0]

	#Create a Grouped DF by Postcode
	pubs_by_postcode=pd.DataFrame(data=pubs['Postcode'].value_counts())
	pubs_by_postcode.reset_index(inplace=True)
	pubs_by_postcode.columns=['Postcode','Pubs']

	#Append Population and Coordinates; calculate Pubs per 1.000 people
	pubs_by_postcode=pubs_by_postcode.merge(cities[['Postcode', 'Latitude', 'Longitude','Town/Area','Population']],how='left',on='Postcode')
	#Import and create all models. Tune appropiate parameters for each model
	from sklearn.tree import DecisionTreeClassifier
	tree = DecisionTreeClassifier(max_depth=4,random_state=0)

	from sklearn.linear_model import LogisticRegression
	logreg = LogisticRegression(C=10)

	from sklearn.neighbors import KNeighborsClassifier
	knn = KNeighborsClassifier(n_neighbors=1)
	#Train Models and get score
	train_scores=[]
	test_scores=[]
	for model in model_list:
	model.fit(X_train, y_train)
	train_scores.append(model.score(X_train, y_train))
	test_scores.append(model.score(X_test, y_test))

	scores_df= pd.DataFrame(
	{'Model Name': ['tree','logreg','knn','naive_b','forest','gbrt','mlp','svc'],
	# Add only 1 column (City)
	table1_city=table1.merge(table2[['Person ID','City']],how='left',on='Person ID')

	#Add all columns of table 2 (City and language)
	table1_all=table1.merge(table2,how='left',on='Person ID')

	#One liner to fill Nans
	table1_fill_nans=table1.merge(table2,how='left',on='Person ID').fillna('Unknown')