Skip to content

Instantly share code, notes, and snippets.

import geopandas
from matplotlib import pyplot as plt
import pandas as pd
#1__________________________________________________________________________________________________________
# We Upload the Subregional Map of Catalonia
gdf_comarcas = geopandas.read_file('Comarcas_CAT.json')
#2__________________________________________________________________________________________________________
# We Upload a csv file containing what Subregions Belong to each Supraregion.
#4__________________________________________________________________________________________________________
# A new dataframe is generated containing all 9 Touristic Brands name, polygons and codes.
data = {'Marca Turistica': [
'Costa Brava', 'Costa Daurada', 'Barcelona', "Terres de l'Ebre",
"Terres de Lleida", "Pirineus", "Paisatges de Barcelona",
"Costa Barcelona", "Vall d'Aran"
],
'geometry': [
boundary_costabrava, boundary_costadaurada, boundary_barcelona,
boundary_ebre, boundary_lleida, boundary_pirineus,
import geopandas
from matplotlib import pyplot as plt
import pandas as pd
#1__________________________________________________________________________________________________________
# We Upload the Subregional Map of Catalonia
gdf_comarcas = geopandas.read_file('Comarcas_CAT.json')
@rfeers
rfeers / Medium_Geopandas_2.py
Created July 22, 2022 13:19
Medium_Geopandas_2.py
#2__________________________________________________________________________________________________________
# We Upload a csv file containing what Subregions Belong to each Supraregion.
mturistiques = pd.read_csv('Mturistiques.csv',delimiter=';')
#We perform a left join to obtain a final DataFrame containing each Subregion, its corresponding Polygon
#and what Supraregion does it belong.
df = gdf_comarcas.merge(mturistiques, left_on='Subregion_Name', right_on = 'Subregion', how='left')
@rfeers
rfeers / Medium_Geopandas_3.py
Created July 22, 2022 13:20
Medium_Geopandas_3.py
#3__________________________________________________________________________________________________________
# To filter all supraregions, a function is defined.
def boundary_generator(df,string):
polygons = gdf.loc[(gdf['Marca Turística'] == string)]
polygons = polygons['geometry'].unary_union
return polygons
# Costa Brava
boundary_costabrava = boundary_generator(df,'Costa Brava')
@rfeers
rfeers / Medium_Geopandas_4.py
Created July 22, 2022 13:21
Medium_Geopandas_4.py
#4__________________________________________________________________________________________________________
# A new dataframe is generated containing all 9 Touristic Brands name, polygons and codes.
data = {'Marca Turistica': [
'Costa Brava', 'Costa Daurada', 'Barcelona', "Terres de l'Ebre",
"Terres de Lleida", "Pirineus", "Paisatges de Barcelona",
"Costa Barcelona", "Vall d'Aran"
],
'geometry': [
boundary_costabrava, boundary_costadaurada, boundary_barcelona,
@rfeers
rfeers / Medium_PandaSQL_Example1.py
Last active August 18, 2022 09:29
Medium_PandaSQL_Example1.py
#We import pandasql
import pandasql
from pandasql import sqldf, load_meat, load_births
pysqldf = lambda q: sqldf(q, globals())
#We define our dataframes (sampels from pandasql)
meats = load_meat()
meats
births = load_births()
@rfeers
rfeers / Medium_PandaSQL_Example2.py
Last active August 19, 2022 11:45
Medium_PandaSQL_Example2.py
#We perform the left join
pysqldf("""
SELECT
*,
CASE WHEN total_births IS NULL THEN 'No Data'
WHEN total_births >= 4000000 THEN 'High Birth Rate'
WHEN total_births < 4000000 THEN 'Low Birth Rate'
END AS births_text
FROM meat_births_merged
@rfeers
rfeers / Medium_SQL_modular_1.sql
Last active September 24, 2022 10:48
First Modular Query
SELECT DISTINCT
neighbourhood,
COUNT(DISTINCT id) AS num_apartments,
COUNT(DISTINCT host_id) AS num_hosts,
ROUND(CAST(COUNT(DISTINCT id) AS FLOAT)/COUNT(DISTINCT host_id),2) AS mean_apartments_by_host,
ROUND(AVG(price),2) AS AVG_price,
ROUND(AVG(review_cleanliness),2) AS AVG_cleanliness,
ROUND(AVG(review_location),2) AS AVG_location
FROM bcn_airbnb_dataset
GROUP BY 1
SELECT DISTINCT
COUNT(DISTINCT id) AS num_apartments,
COUNT(DISTINCT host_id) AS num_hosts,
ROUND(CAST(COUNT(DISTINCT id) AS FLOAT)/COUNT(DISTINCT host_id),2) AS mean_apartments_by_host,
ROUND(AVG(price),2) AS AVG_price,
ROUND(AVG(review_cleanliness),2) AS AVG_cleanliness,
ROUND(AVG(review_location),2) AS AVG_location
FROM bcn_airbnb_dataset
ORDER BY 1 DESC