aneury1/lcf.py

## lcf.py
import fiona
import numpy
import matplotlib.pyplot as plotter
import folium
import geopandas
import pyproj
from sklearn import linear_model
from shapely.geometry import Point, Polygon
from sklearn.cluster import KMeans
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import OPTICS, cluster_optics_dbscan
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import numpy as np
import datatable as dt
from sklearn.linear_model import LinearRegression


DATASET_FOLDER = "C:\\Users\\Aneury\\Desktop\\TanqueJonathan\\Lucy\\Comunidades_Vulnerables_Nacional_2019_LucyMonicaLiriano\\"
SHAPEFILE = "Comunidades_Vulnerables_Nacional_2019.shp"

mapa = geopandas.read_file(DATASET_FOLDER+SHAPEFILE)
geo_shape = geopandas.read_file(DATASET_FOLDER+SHAPEFILE)

# define the condition to filter the rows based on a specific column
condition = geo_shape["MUNICIPIO"] == "SANTO DOMINGO ESTE"

# use the .loc[] method to select the rows that satisfy the condition
geo_shape = geo_shape.loc[condition]

dt.Frame(geo_shape['Causa'])
test = dt.Frame(geo_shape['Causa'])

mapping = {"DESBORDAMIENTO DEL RIO PUEBLO VIEJO": 1, "PRECIPITACIONES FUERTES": 2,
           "OBSTRUCCION DEL DRENAJE PLUVIAL": 3,
           "DESBORDAMIENTO DEL CANAL CAÑEO Y AGUAS QUE VIENEN DE LA PARTE NORTE DE LA CIUDAD" :4}

# extract the column as a list of strings
col_str = test[:, "Causa"].to_list()[0]

# convert the list of strings to a numpy array
col_np = np.array(col_str)

# use numpy.where() to replace empty strings with a default weight of 0
col_np = np.where(col_np != "", col_np, "0")

# use numpy.vectorize() to map the strings to the numerical weight
vfunc = np.vectorize(lambda x:len(x)) #mapping.get(x))
col_weight = vfunc(col_np)

#geo
# define the condition to filter the rows based on a specific column
condition2 = geo_shape["MUNICIPIO"] == "SANTO DOMINGO ESTE"

# use the .loc[] method to select the rows that satisfy the condition
geo_shape2 = geo_shape.loc[condition2]

dt.Frame(geo_shape['Vulnerable'])

test2 = dt.Frame(geo_shape['Vulnerable'])

mapping2 = {"INUNDACION": 1}

# extract the column as a list of strings
col_str2 = test2[:, "Vulnerable"].to_list()[0]

# convert the list of strings to a numpy array
col_np2 = np.array(col_str2)

# use numpy.where() to replace empty strings with a default weight of 0
col_np2 = np.where(col_np2 != "", col_np2, "0")
print (col_np2)

# use numpy.vectorize() to map the strings to the numerical weight
vfunc2 = np.vectorize(lambda x:len(x)) #mapping.get(x))
col_weight2 = vfunc2(col_np2)

ID  = col_weight2
causa  = col_weight

LUCY_TRANSFORM = dict([(y,x+1) for x,y in enumerate(sorted(set(geo_shape['Causa'])))])

def AlgorithKMean():
    #Clustering KMean
    # X = np.array([LUCY_TRANSFORM, ID])
    X = np.column_stack((causa, ID))
    # Instantiate the k-means algorithm with 3 clusters
    kmeans = KMeans(n_clusters=3)

    # Fit the algorithm to the data
    kmeans.fit(X)

    # Get the cluster centers and labels
    centers = kmeans.cluster_centers_
    labels = kmeans.labels_

    # Plot the data and the cluster centers
    plt.scatter(X[:, 0], X[:, 1], c=labels)
    plt.scatter(centers[:, 0], centers[:, 1], marker='x', s=200, linewidths=3, color='r')
    plt.show()

def AlgorithmOPTICS():
    #####

    # Generate sample data

    np.random.seed(0)
    n_points_per_cluster = 1250

    C1 = [-5, -2] + 0.8 *  np.random.randn(n_points_per_cluster, 2)
    C2 = [4, -1] + 0.1 *  np.random.randn(n_points_per_cluster, 2)
    C3 = [1, -2] + 0.2 *   np.random.randn(n_points_per_cluster, 2)
    C4 = [-2, 3] + 0.3 *  np.random.randn(n_points_per_cluster, 2)
    X = np.vstack((C1, C2, C3, C4 ))

    clust = OPTICS(min_samples=45, xi=0.05, min_cluster_size=0.05)

    # Run the fit
    clust.fit(X)

    labels_050 = cluster_optics_dbscan(
        reachability=clust.reachability_,
        core_distances=clust.core_distances_,
        ordering=clust.ordering_,
        eps=0.5,
    )
    labels_200 = cluster_optics_dbscan(
        reachability=clust.reachability_,
        core_distances=clust.core_distances_,
        ordering=clust.ordering_,
        eps=2,
    )

    space = np.arange(len(X))
    reachability = clust.reachability_[clust.ordering_]
    labels = clust.labels_[clust.ordering_]

    plt.figure(figsize=(10, 7))
    G = gridspec.GridSpec(2, 3)
    ax1 = plt.subplot(G[0, :])
    ax2 = plt.subplot(G[1, 0])
    ax3 = plt.subplot(G[1, 1])
    ax4 = plt.subplot(G[1, 2])

    # Reachability plot
    colors = ["g.", "r.", "b.", "y.", "c."]
    for klass, color in zip(range(0, 5), colors):
        Xk = space[labels == klass]
        Rk = reachability[labels == klass]
        ax1.plot(Xk, Rk, color, alpha=0.3)
    ax1.plot(space[labels == -1], reachability[labels == -1], "k.", alpha=0.3)
    ax1.plot(space, np.full_like(space, 2.0, dtype=float), "k-", alpha=0.5)
    ax1.plot(space, np.full_like(space, 0.5, dtype=float), "k-.", alpha=0.5)
    ax1.set_ylabel("Reachability (epsilon distance)")
    ax1.set_title("Reachability Plot")

    # OPTICS
    colors = ["g.", "r.", "b.", "y.", "c."]
    for klass, color in zip(range(0, 5), colors):
        Xk = X[clust.labels_ == klass]
        ax2.plot(Xk[:, 0], Xk[:, 1], color, alpha=0.3)
    ax2.plot(X[clust.labels_ == -1, 0], X[clust.labels_ == -1, 1], "k+", alpha=0.1)
    ax2.set_title("Automatic Clustering\nOPTICS")

    # DBSCAN at 0.5
    colors = ["g.", "r.", "b.", "c."]
    for klass, color in zip(range(0, 4), colors):
        Xk = X[labels_050 == klass]
        ax3.plot(Xk[:, 0], Xk[:, 1], color, alpha=0.3)
    ax3.plot(X[labels_050 == -1, 0], X[labels_050 == -1, 1], "k+", alpha=0.1)
    ax3.set_title("Clustering at 0.5 epsilon cut\nDBSCAN")

    # DBSCAN at 2.
    colors = ["g.", "m.", "y.", "c."]
    for klass, color in zip(range(0, 4), colors):
        Xk = X[labels_200 == klass]
        ax4.plot(Xk[:, 0], Xk[:, 1], color, alpha=0.3)
    ax4.plot(X[labels_200 == -1, 0], X[labels_200 == -1, 1], "k+", alpha=0.1)
    ax4.set_title("Clustering at 2.0 epsilon cut\nDBSCAN")

    plt.tight_layout()
    plt.show()


def AlgorithmLinearRegresion():
    x = col_weight
    y = col_weight2
    X = x.reshape(-1, 1)
    model = LinearRegression()
    model.fit(X, y)
    x_test = np.array([6]).reshape(-1, 1)
    y_pred = model.predict(x_test)
    plt.scatter(x, y)
    plt.plot(x, model.predict(X))
    plt.xlabel('Causa')
    plt.ylabel('Vulnerable')
    plt.title('Regresion Linear utilizando(Causa,Vulnerable)')
    plt.show()


AlgorithmOPTICS()
AlgorithKMean()
AlgorithmLinearRegresion()
mapa.explore()
mapa.plot()
	import fiona
	import numpy
	import matplotlib.pyplot as plotter
	import folium
	import geopandas
	import pyproj
	from sklearn import linear_model
	from shapely.geometry import Point, Polygon
	from sklearn.cluster import KMeans
	import numpy as np
	import matplotlib.pyplot as plt
	from sklearn.cluster import OPTICS, cluster_optics_dbscan
	import matplotlib.gridspec as gridspec
	import matplotlib.pyplot as plt
	import numpy as np
	import datatable as dt
	from sklearn.linear_model import LinearRegression


	DATASET_FOLDER = "C:\\Users\\Aneury\\Desktop\\TanqueJonathan\\Lucy\\Comunidades_Vulnerables_Nacional_2019_LucyMonicaLiriano\\"
	SHAPEFILE = "Comunidades_Vulnerables_Nacional_2019.shp"

	mapa = geopandas.read_file(DATASET_FOLDER+SHAPEFILE)
	geo_shape = geopandas.read_file(DATASET_FOLDER+SHAPEFILE)

	# define the condition to filter the rows based on a specific column
	condition = geo_shape["MUNICIPIO"] == "SANTO DOMINGO ESTE"

	# use the .loc[] method to select the rows that satisfy the condition
	geo_shape = geo_shape.loc[condition]

	dt.Frame(geo_shape['Causa'])
	test = dt.Frame(geo_shape['Causa'])

	mapping = {"DESBORDAMIENTO DEL RIO PUEBLO VIEJO": 1, "PRECIPITACIONES FUERTES": 2,
	"OBSTRUCCION DEL DRENAJE PLUVIAL": 3,
	"DESBORDAMIENTO DEL CANAL CAÑEO Y AGUAS QUE VIENEN DE LA PARTE NORTE DE LA CIUDAD" :4}

	# extract the column as a list of strings
	col_str = test[:, "Causa"].to_list()[0]

	# convert the list of strings to a numpy array
	col_np = np.array(col_str)

	# use numpy.where() to replace empty strings with a default weight of 0
	col_np = np.where(col_np != "", col_np, "0")

	# use numpy.vectorize() to map the strings to the numerical weight
	vfunc = np.vectorize(lambda x:len(x)) #mapping.get(x))
	col_weight = vfunc(col_np)

	#geo
	# define the condition to filter the rows based on a specific column
	condition2 = geo_shape["MUNICIPIO"] == "SANTO DOMINGO ESTE"

	# use the .loc[] method to select the rows that satisfy the condition
	geo_shape2 = geo_shape.loc[condition2]

	dt.Frame(geo_shape['Vulnerable'])

	test2 = dt.Frame(geo_shape['Vulnerable'])

	mapping2 = {"INUNDACION": 1}

	# extract the column as a list of strings
	col_str2 = test2[:, "Vulnerable"].to_list()[0]

	# convert the list of strings to a numpy array
	col_np2 = np.array(col_str2)

	# use numpy.where() to replace empty strings with a default weight of 0
	col_np2 = np.where(col_np2 != "", col_np2, "0")
	print (col_np2)

	# use numpy.vectorize() to map the strings to the numerical weight
	vfunc2 = np.vectorize(lambda x:len(x)) #mapping.get(x))
	col_weight2 = vfunc2(col_np2)

	ID = col_weight2
	causa = col_weight

	LUCY_TRANSFORM = dict([(y,x+1) for x,y in enumerate(sorted(set(geo_shape['Causa'])))])

	def AlgorithKMean():
	#Clustering KMean
	# X = np.array([LUCY_TRANSFORM, ID])
	X = np.column_stack((causa, ID))
	# Instantiate the k-means algorithm with 3 clusters
	kmeans = KMeans(n_clusters=3)

	# Fit the algorithm to the data
	kmeans.fit(X)

	# Get the cluster centers and labels
	centers = kmeans.cluster_centers_
	labels = kmeans.labels_

	# Plot the data and the cluster centers
	plt.scatter(X[:, 0], X[:, 1], c=labels)
	plt.scatter(centers[:, 0], centers[:, 1], marker='x', s=200, linewidths=3, color='r')
	plt.show()

	def AlgorithmOPTICS():
	#####

	# Generate sample data

	np.random.seed(0)
	n_points_per_cluster = 1250

	C1 = [-5, -2] + 0.8 * np.random.randn(n_points_per_cluster, 2)
	C2 = [4, -1] + 0.1 * np.random.randn(n_points_per_cluster, 2)
	C3 = [1, -2] + 0.2 * np.random.randn(n_points_per_cluster, 2)
	C4 = [-2, 3] + 0.3 * np.random.randn(n_points_per_cluster, 2)
	X = np.vstack((C1, C2, C3, C4 ))

	clust = OPTICS(min_samples=45, xi=0.05, min_cluster_size=0.05)

	# Run the fit
	clust.fit(X)

	labels_050 = cluster_optics_dbscan(
	reachability=clust.reachability_,
	core_distances=clust.core_distances_,
	ordering=clust.ordering_,
	eps=0.5,
	)
	labels_200 = cluster_optics_dbscan(
	reachability=clust.reachability_,
	core_distances=clust.core_distances_,
	ordering=clust.ordering_,
	eps=2,
	)

	space = np.arange(len(X))
	reachability = clust.reachability_[clust.ordering_]
	labels = clust.labels_[clust.ordering_]

	plt.figure(figsize=(10, 7))
	G = gridspec.GridSpec(2, 3)
	ax1 = plt.subplot(G[0, :])
	ax2 = plt.subplot(G[1, 0])
	ax3 = plt.subplot(G[1, 1])
	ax4 = plt.subplot(G[1, 2])

	# Reachability plot
	colors = ["g.", "r.", "b.", "y.", "c."]
	for klass, color in zip(range(0, 5), colors):
	Xk = space[labels == klass]
	Rk = reachability[labels == klass]
	ax1.plot(Xk, Rk, color, alpha=0.3)
	ax1.plot(space[labels == -1], reachability[labels == -1], "k.", alpha=0.3)
	ax1.plot(space, np.full_like(space, 2.0, dtype=float), "k-", alpha=0.5)
	ax1.plot(space, np.full_like(space, 0.5, dtype=float), "k-.", alpha=0.5)
	ax1.set_ylabel("Reachability (epsilon distance)")
	ax1.set_title("Reachability Plot")

	# OPTICS
	colors = ["g.", "r.", "b.", "y.", "c."]
	for klass, color in zip(range(0, 5), colors):
	Xk = X[clust.labels_ == klass]
	ax2.plot(Xk[:, 0], Xk[:, 1], color, alpha=0.3)
	ax2.plot(X[clust.labels_ == -1, 0], X[clust.labels_ == -1, 1], "k+", alpha=0.1)
	ax2.set_title("Automatic Clustering\nOPTICS")

	# DBSCAN at 0.5
	colors = ["g.", "r.", "b.", "c."]
	for klass, color in zip(range(0, 4), colors):
	Xk = X[labels_050 == klass]
	ax3.plot(Xk[:, 0], Xk[:, 1], color, alpha=0.3)
	ax3.plot(X[labels_050 == -1, 0], X[labels_050 == -1, 1], "k+", alpha=0.1)
	ax3.set_title("Clustering at 0.5 epsilon cut\nDBSCAN")

	# DBSCAN at 2.
	colors = ["g.", "m.", "y.", "c."]
	for klass, color in zip(range(0, 4), colors):
	Xk = X[labels_200 == klass]
	ax4.plot(Xk[:, 0], Xk[:, 1], color, alpha=0.3)
	ax4.plot(X[labels_200 == -1, 0], X[labels_200 == -1, 1], "k+", alpha=0.1)
	ax4.set_title("Clustering at 2.0 epsilon cut\nDBSCAN")

	plt.tight_layout()
	plt.show()


	def AlgorithmLinearRegresion():
	x = col_weight
	y = col_weight2
	X = x.reshape(-1, 1)
	model = LinearRegression()
	model.fit(X, y)
	x_test = np.array([6]).reshape(-1, 1)
	y_pred = model.predict(x_test)
	plt.scatter(x, y)
	plt.plot(x, model.predict(X))
	plt.xlabel('Causa')
	plt.ylabel('Vulnerable')
	plt.title('Regresion Linear utilizando(Causa,Vulnerable)')
	plt.show()



	AlgorithmOPTICS()
	AlgorithKMean()
	AlgorithmLinearRegresion()
	mapa.explore()
	mapa.plot()