Skip to content

Instantly share code, notes, and snippets.

View isaacarroyov's full-sized avatar
💻
doing all the data stuff in journalism

Isaac Arroyo isaacarroyov

💻
doing all the data stuff in journalism
View GitHub Profile
@isaacarroyov
isaacarroyov / relevant_libraries.py
Created September 18, 2020 18:08
relevant code for the crime_analysis_mx2017 repository
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
#GENERAL PLOT SETTINGS
#style sheet
plt.style.use('seaborn-paper')
@isaacarroyov
isaacarroyov / data_preprocessing.py
Last active November 20, 2020 02:50
relevant code for the preprocessing
#select the relevant variables
variables = np.array( [ 'HOMICIDES', 'CAR_THEFT',
'EXTORTION', 'STREET_TRANSPORT_THEFT' ] )
#Here we standardize a dataset along any axis.
#Center to the mean and component wise scale to unit variance.
from sklearn import preprocessing
df_standardized = preprocessing.scale( df[variables] )
df_standardized = pd.DataFrame( df_standardized )
@isaacarroyov
isaacarroyov / sil_method.py
Created November 20, 2020 03:43
application of the silhouette method
from sklearn.metrics import silhouette_samples, silhouette_score
#create a list of K-Means models. Each element is a KMeans model with a specific number of clusters
kmeans2_sil= [KMeans(n_clusters=i, random_state=11, init='k-means++').fit(df_standardized) for i in range(1,15)]
#calculamos el silhouette score a cada modelo (de diferente número de clusters)
sil_values = [ silhouette_score(df_standardized, model.labels_, random_state=11)
for model in kmeans2_sil[1:]
]
#save the silhouette coefficient in an array
@isaacarroyov
isaacarroyov / elbow_method.py
Last active November 20, 2020 04:50
application of elbow method
from sklearn.cluster import KMeans
#create a list for the wcss parameter
wcss = []
#test with 14 clusters
for i in range(1, 15):
kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 11)
kmeans.fit(df_standardized)
wcss.append(kmeans.inertia_)
@isaacarroyov
isaacarroyov / hello.py
Created March 5, 2021 22:22
inicios de python
print('Hola a todos')
def degres2decimal(a_degree, a_minute, a_seconds):
"""
This functions has as input numpy-arrays of degrees, minutes and seconds and
returns a decimal degree array
"""
degree = a_degree.astype(int)
minute = a_minute.astype(int)
seconds = a_seconds.astype(int)
return degree + minute/60 + seconds/3600
#assign colours to duration days
def colour_duration(string):
if string == '1 Día':
return '#003049'
elif string == '2 a 3 Días':
return '#FCBF49'
elif string == '4 a 7 Días':
return '#F77F00'
else:
return '#D62828'
style_function = lambda feature: {
'fill_color': COLOUR_FUNCTION(feature),
'fillOpacity': 0.95,
'color': 'black' #contour colour,
'weight': 1 #tickness of the colour
}
def get_color(feature):
value = map_dict.get(feature['id'])
if value is None:
return '#8c8c8c' # MISSING -> gray
elif value ==1 :
return '#DAF7A6'
elif value ==2 :
return '#FFC300'
elif value ==3 :
return '#FF5733'
import pandas as pd
import numpy as np
import folium
from folium.plugins import HeatMap
loc = df[ ['Latitude', 'Longitude'] ].to_numpy()
map_heatmap = folium.Map( location= (df['Latitude'].mean(), df['Longitude'].mean()), zoom_start= 5 )
HeatMap(
data = loc,