Skip to content

Instantly share code, notes, and snippets.

df.hist(figsize=(14,16), bins = 20, xlabelsize=8, ylabelsize = 8)
df["DAILY_STRESS"] = pd.to_numeric(df["DAILY_STRESS"], errors='coerce')
df["DAILY_STRESS"].fillna(df["DAILY_STRESS"].median(), inplace = True)
import pandas as pd
import numpy as np
df = pd.read_csv("Wellbeing_and_lifestyle_data_Kaggle.csv",
parse_dates = ["Timestamp"])
df['Year'] = pd.DatetimeIndex(df['Timestamp']).year
df['Month'] = pd.DatetimeIndex(df['Timestamp']).month
df.head()
@taufiqbashori
taufiqbashori / wellbeing_importkaggle.py
Last active December 28, 2021 09:58
wellbeing_importkaggle
! pip install kaggle
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
! kaggle datasets download ydalat/lifestyle-and-wellbeing-data
! unzip lifestyle-and-wellbeing-data
#covert to geoJSON
x_json = x.__geo_interface__
MAPBOX_ACCESSTOKEN = 'INSERT YOUR MAPBOX ACCESS TOKEN HERE'
zmin = x['pop'].min()
zmax = x['pop'].max()
# Set the data for the map
data = go.Choroplethmapbox(
# OPTIONAL: Display using geopandas
fig, ax = plt.subplots(1,1, figsize=(10,20))
divider = make_axes_locatable(ax)
tmpx = x.copy()
#To display values
tmpx['pop'] = tmpx['pop']
#resize the colorbar
cax = divider.append_axes("left", size="3%", pad=-3)
# define scope of the mapping (i.e DKI JAKARTA)
x = dfx[dfx["province"]=='DKI JAKARTA'][["district","sub_district","village","geometry","border"]].copy()
x = x.astype('string')
# cast geometry column to wkt string geoseries
x['geometry'] = gpd.GeoSeries.from_wkt(x['geometry'])
x = gpd.GeoDataFrame(x)
# create random integer to illustrate use cases
x['pop'] = np.random.randint(0, 3000, x.shape[0])
from google.colab import files
dfx.to_csv("indoregion_village_mapped.csv")
files.download("indoregion_village_mapped.csv")
dfx_plot = dfx.copy()
from shapely import wkt
geom = []
for g in dfx_plot['geometry']:
try:
geom.append(wkt.loads(g))
except:
geom.append(None)
# map border_dict containing polygon & multipolygon values to original dataframe and name it 'geometry'
dfx['geometry']=dfx.index.map(border_dict)
# see the result and check for specifically multipolygon to confirm if the for loop is working as intended
dfx[dfx.border.str.startswith("[[[")]