Josep Ferrer Sánchez rfeers

## Geopandas.py
import geopandas
from matplotlib import pyplot as plt
import pandas as pd

#1__________________________________________________________________________________________________________
# We Upload the Subregional Map of Catalonia
gdf_comarcas = geopandas.read_file('Comarcas_CAT.json')

#2__________________________________________________________________________________________________________
# We Upload a csv file containing what Subregions Belong to each Supraregion.

## Merging_Supraregions.py
#4__________________________________________________________________________________________________________
# A new dataframe is generated containing all 9 Touristic Brands name, polygons and codes.
data = {'Marca Turistica': [
                            'Costa Brava', 'Costa Daurada', 'Barcelona', "Terres de l'Ebre",
                            "Terres de Lleida", "Pirineus", "Paisatges de Barcelona",
                            "Costa Barcelona", "Vall d'Aran"
                           ],
        'geometry': [
                     boundary_costabrava, boundary_costadaurada, boundary_barcelona,
                     boundary_ebre, boundary_lleida, boundary_pirineus,

## gist:065159db3959ea29d52b7c0e48749dee
import geopandas
from matplotlib import pyplot as plt
import pandas as pd

#1__________________________________________________________________________________________________________
# We Upload the Subregional Map of Catalonia
gdf_comarcas = geopandas.read_file('Comarcas_CAT.json')

## Medium_Geopandas_2.py
#2__________________________________________________________________________________________________________
# We Upload a csv file containing what Subregions Belong to each Supraregion.
mturistiques = pd.read_csv('Mturistiques.csv',delimiter=';')
#We perform a left join to obtain a final DataFrame containing each Subregion, its corresponding Polygon
#and what Supraregion does it belong.
df = gdf_comarcas.merge(mturistiques, left_on='Subregion_Name', right_on = 'Subregion',  how='left')

## Medium_Geopandas_3.py
#3__________________________________________________________________________________________________________
# To filter all supraregions, a function is defined.
def boundary_generator(df,string):
    polygons = gdf.loc[(gdf['Marca Turística'] == string)]
    polygons = polygons['geometry'].unary_union
    return polygons


# Costa Brava
boundary_costabrava = boundary_generator(df,'Costa Brava')

## Medium_Geopandas_4.py

#4__________________________________________________________________________________________________________
# A new dataframe is generated containing all 9 Touristic Brands name, polygons and codes.
data = {'Marca Turistica': [
                            'Costa Brava', 'Costa Daurada', 'Barcelona', "Terres de l'Ebre",
                            "Terres de Lleida", "Pirineus", "Paisatges de Barcelona",
                            "Costa Barcelona", "Vall d'Aran"
                           ],
        'geometry': [
                     boundary_costabrava, boundary_costadaurada, boundary_barcelona,

## Medium_PandaSQL_Example1.py
#We import pandasql
import pandasql
from pandasql import sqldf, load_meat, load_births
pysqldf = lambda q: sqldf(q, globals())


#We define our dataframes (sampels from pandasql)
meats  = load_meat()
meats
births  = load_births()

## Medium_PandaSQL_Example2.py
#We perform the left join
pysqldf("""
            SELECT

            *,
            CASE WHEN total_births IS NULL      THEN 'No Data'
                 WHEN total_births >= 4000000   THEN 'High Birth Rate'
                 WHEN total_births <  4000000   THEN 'Low Birth Rate'
            END AS births_text
            FROM meat_births_merged

## Medium_SQL_modular_1.sql
SELECT DISTINCT
       neighbourhood,
       COUNT(DISTINCT id)                                                  AS num_apartments,
       COUNT(DISTINCT host_id)                                             AS num_hosts,
       ROUND(CAST(COUNT(DISTINCT id) AS FLOAT)/COUNT(DISTINCT host_id),2)  AS mean_apartments_by_host,
       ROUND(AVG(price),2)                                                 AS AVG_price,
       ROUND(AVG(review_cleanliness),2)                                    AS AVG_cleanliness,
       ROUND(AVG(review_location),2)                                       AS AVG_location
FROM bcn_airbnb_dataset
GROUP BY 1

## Medium_SQL_modular_2.sql
SELECT DISTINCT
       COUNT(DISTINCT id)                                                  AS num_apartments,
       COUNT(DISTINCT host_id)                                             AS num_hosts,
       ROUND(CAST(COUNT(DISTINCT id) AS FLOAT)/COUNT(DISTINCT host_id),2)  AS mean_apartments_by_host,
       ROUND(AVG(price),2)                                                 AS AVG_price,
       ROUND(AVG(review_cleanliness),2)                                    AS AVG_cleanliness,
       ROUND(AVG(review_location),2)                                       AS AVG_location
FROM bcn_airbnb_dataset
ORDER BY 1 DESC
	import geopandas
	from matplotlib import pyplot as plt
	import pandas as pd

	#1__________________________________________________________________________________________________________
	# We Upload the Subregional Map of Catalonia
	gdf_comarcas = geopandas.read_file('Comarcas_CAT.json')

	#2__________________________________________________________________________________________________________
	# We Upload a csv file containing what Subregions Belong to each Supraregion.
	#4__________________________________________________________________________________________________________
	# A new dataframe is generated containing all 9 Touristic Brands name, polygons and codes.
	data = {'Marca Turistica': [
	'Costa Brava', 'Costa Daurada', 'Barcelona', "Terres de l'Ebre",
	"Terres de Lleida", "Pirineus", "Paisatges de Barcelona",
	"Costa Barcelona", "Vall d'Aran"
	],
	'geometry': [
	boundary_costabrava, boundary_costadaurada, boundary_barcelona,
	boundary_ebre, boundary_lleida, boundary_pirineus,
	#2__________________________________________________________________________________________________________
	# We Upload a csv file containing what Subregions Belong to each Supraregion.
	mturistiques = pd.read_csv('Mturistiques.csv',delimiter=';')
	#We perform a left join to obtain a final DataFrame containing each Subregion, its corresponding Polygon
	#and what Supraregion does it belong.
	df = gdf_comarcas.merge(mturistiques, left_on='Subregion_Name', right_on = 'Subregion', how='left')
	#3__________________________________________________________________________________________________________
	# To filter all supraregions, a function is defined.
	def boundary_generator(df,string):
	polygons = gdf.loc[(gdf['Marca Turística'] == string)]
	polygons = polygons['geometry'].unary_union
	return polygons


	# Costa Brava
	boundary_costabrava = boundary_generator(df,'Costa Brava')
	#We import pandasql
	import pandasql
	from pandasql import sqldf, load_meat, load_births
	pysqldf = lambda q: sqldf(q, globals())


	#We define our dataframes (sampels from pandasql)
	meats = load_meat()
	meats
	births = load_births()
	#We perform the left join
	pysqldf("""
	SELECT

	*,
	CASE WHEN total_births IS NULL THEN 'No Data'
	WHEN total_births >= 4000000 THEN 'High Birth Rate'
	WHEN total_births < 4000000 THEN 'Low Birth Rate'
	END AS births_text
	FROM meat_births_merged
	SELECT DISTINCT
	neighbourhood,
	COUNT(DISTINCT id) AS num_apartments,
	COUNT(DISTINCT host_id) AS num_hosts,
	ROUND(CAST(COUNT(DISTINCT id) AS FLOAT)/COUNT(DISTINCT host_id),2) AS mean_apartments_by_host,
	ROUND(AVG(price),2) AS AVG_price,
	ROUND(AVG(review_cleanliness),2) AS AVG_cleanliness,
	ROUND(AVG(review_location),2) AS AVG_location
	FROM bcn_airbnb_dataset
	GROUP BY 1