Skip to content

Instantly share code, notes, and snippets.

@aneury1
Last active February 27, 2023 14:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aneury1/a8615d5826c6f90da62ff4bdc41521d8 to your computer and use it in GitHub Desktop.
Save aneury1/a8615d5826c6f90da62ff4bdc41521d8 to your computer and use it in GitHub Desktop.
import fiona
import numpy
import matplotlib.pyplot as plotter
import folium
import geopandas
import pyproj
from sklearn import linear_model
from shapely.geometry import Point, Polygon
from sklearn.cluster import KMeans
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import OPTICS, cluster_optics_dbscan
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import numpy as np
import datatable as dt
from sklearn.linear_model import LinearRegression
DATASET_FOLDER = "C:\\Users\\Aneury\\Desktop\\TanqueJonathan\\Lucy\\Comunidades_Vulnerables_Nacional_2019_LucyMonicaLiriano\\"
SHAPEFILE = "Comunidades_Vulnerables_Nacional_2019.shp"
mapa = geopandas.read_file(DATASET_FOLDER+SHAPEFILE)
geo_shape = geopandas.read_file(DATASET_FOLDER+SHAPEFILE)
# define the condition to filter the rows based on a specific column
condition = geo_shape["MUNICIPIO"] == "SANTO DOMINGO ESTE"
# use the .loc[] method to select the rows that satisfy the condition
geo_shape = geo_shape.loc[condition]
dt.Frame(geo_shape['Causa'])
test = dt.Frame(geo_shape['Causa'])
mapping = {"DESBORDAMIENTO DEL RIO PUEBLO VIEJO": 1, "PRECIPITACIONES FUERTES": 2,
"OBSTRUCCION DEL DRENAJE PLUVIAL": 3,
"DESBORDAMIENTO DEL CANAL CAÑEO Y AGUAS QUE VIENEN DE LA PARTE NORTE DE LA CIUDAD" :4}
# extract the column as a list of strings
col_str = test[:, "Causa"].to_list()[0]
# convert the list of strings to a numpy array
col_np = np.array(col_str)
# use numpy.where() to replace empty strings with a default weight of 0
col_np = np.where(col_np != "", col_np, "0")
# use numpy.vectorize() to map the strings to the numerical weight
vfunc = np.vectorize(lambda x:len(x)) #mapping.get(x))
col_weight = vfunc(col_np)
#geo
# define the condition to filter the rows based on a specific column
condition2 = geo_shape["MUNICIPIO"] == "SANTO DOMINGO ESTE"
# use the .loc[] method to select the rows that satisfy the condition
geo_shape2 = geo_shape.loc[condition2]
dt.Frame(geo_shape['Vulnerable'])
test2 = dt.Frame(geo_shape['Vulnerable'])
mapping2 = {"INUNDACION": 1}
# extract the column as a list of strings
col_str2 = test2[:, "Vulnerable"].to_list()[0]
# convert the list of strings to a numpy array
col_np2 = np.array(col_str2)
# use numpy.where() to replace empty strings with a default weight of 0
col_np2 = np.where(col_np2 != "", col_np2, "0")
print (col_np2)
# use numpy.vectorize() to map the strings to the numerical weight
vfunc2 = np.vectorize(lambda x:len(x)) #mapping.get(x))
col_weight2 = vfunc2(col_np2)
ID = col_weight2
causa = col_weight
LUCY_TRANSFORM = dict([(y,x+1) for x,y in enumerate(sorted(set(geo_shape['Causa'])))])
def AlgorithKMean():
#Clustering KMean
# X = np.array([LUCY_TRANSFORM, ID])
X = np.column_stack((causa, ID))
# Instantiate the k-means algorithm with 3 clusters
kmeans = KMeans(n_clusters=3)
# Fit the algorithm to the data
kmeans.fit(X)
# Get the cluster centers and labels
centers = kmeans.cluster_centers_
labels = kmeans.labels_
# Plot the data and the cluster centers
plt.scatter(X[:, 0], X[:, 1], c=labels)
plt.scatter(centers[:, 0], centers[:, 1], marker='x', s=200, linewidths=3, color='r')
plt.show()
def AlgorithmOPTICS():
#####
# Generate sample data
np.random.seed(0)
n_points_per_cluster = 1250
C1 = [-5, -2] + 0.8 * np.random.randn(n_points_per_cluster, 2)
C2 = [4, -1] + 0.1 * np.random.randn(n_points_per_cluster, 2)
C3 = [1, -2] + 0.2 * np.random.randn(n_points_per_cluster, 2)
C4 = [-2, 3] + 0.3 * np.random.randn(n_points_per_cluster, 2)
X = np.vstack((C1, C2, C3, C4 ))
clust = OPTICS(min_samples=45, xi=0.05, min_cluster_size=0.05)
# Run the fit
clust.fit(X)
labels_050 = cluster_optics_dbscan(
reachability=clust.reachability_,
core_distances=clust.core_distances_,
ordering=clust.ordering_,
eps=0.5,
)
labels_200 = cluster_optics_dbscan(
reachability=clust.reachability_,
core_distances=clust.core_distances_,
ordering=clust.ordering_,
eps=2,
)
space = np.arange(len(X))
reachability = clust.reachability_[clust.ordering_]
labels = clust.labels_[clust.ordering_]
plt.figure(figsize=(10, 7))
G = gridspec.GridSpec(2, 3)
ax1 = plt.subplot(G[0, :])
ax2 = plt.subplot(G[1, 0])
ax3 = plt.subplot(G[1, 1])
ax4 = plt.subplot(G[1, 2])
# Reachability plot
colors = ["g.", "r.", "b.", "y.", "c."]
for klass, color in zip(range(0, 5), colors):
Xk = space[labels == klass]
Rk = reachability[labels == klass]
ax1.plot(Xk, Rk, color, alpha=0.3)
ax1.plot(space[labels == -1], reachability[labels == -1], "k.", alpha=0.3)
ax1.plot(space, np.full_like(space, 2.0, dtype=float), "k-", alpha=0.5)
ax1.plot(space, np.full_like(space, 0.5, dtype=float), "k-.", alpha=0.5)
ax1.set_ylabel("Reachability (epsilon distance)")
ax1.set_title("Reachability Plot")
# OPTICS
colors = ["g.", "r.", "b.", "y.", "c."]
for klass, color in zip(range(0, 5), colors):
Xk = X[clust.labels_ == klass]
ax2.plot(Xk[:, 0], Xk[:, 1], color, alpha=0.3)
ax2.plot(X[clust.labels_ == -1, 0], X[clust.labels_ == -1, 1], "k+", alpha=0.1)
ax2.set_title("Automatic Clustering\nOPTICS")
# DBSCAN at 0.5
colors = ["g.", "r.", "b.", "c."]
for klass, color in zip(range(0, 4), colors):
Xk = X[labels_050 == klass]
ax3.plot(Xk[:, 0], Xk[:, 1], color, alpha=0.3)
ax3.plot(X[labels_050 == -1, 0], X[labels_050 == -1, 1], "k+", alpha=0.1)
ax3.set_title("Clustering at 0.5 epsilon cut\nDBSCAN")
# DBSCAN at 2.
colors = ["g.", "m.", "y.", "c."]
for klass, color in zip(range(0, 4), colors):
Xk = X[labels_200 == klass]
ax4.plot(Xk[:, 0], Xk[:, 1], color, alpha=0.3)
ax4.plot(X[labels_200 == -1, 0], X[labels_200 == -1, 1], "k+", alpha=0.1)
ax4.set_title("Clustering at 2.0 epsilon cut\nDBSCAN")
plt.tight_layout()
plt.show()
def AlgorithmLinearRegresion():
x = col_weight
y = col_weight2
X = x.reshape(-1, 1)
model = LinearRegression()
model.fit(X, y)
x_test = np.array([6]).reshape(-1, 1)
y_pred = model.predict(x_test)
plt.scatter(x, y)
plt.plot(x, model.predict(X))
plt.xlabel('Causa')
plt.ylabel('Vulnerable')
plt.title('Regresion Linear utilizando(Causa,Vulnerable)')
plt.show()
AlgorithmOPTICS()
AlgorithKMean()
AlgorithmLinearRegresion()
mapa.explore()
mapa.plot()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment