aneury1/clustering.py

## clustering.py
import fiona
import numpy
import matplotlib.pyplot as plotter
import folium
import geopandas
import pyproj
from sklearn import linear_model
from shapely.geometry import Point, Polygon
from sklearn.cluster import KMeans
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import OPTICS, cluster_optics_dbscan
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import numpy as np
import datatable as dt
from sklearn.linear_model import LinearRegression


def AlgorithKMean():

    column1 = np.array(precipitaciones["Enero"])
    column2 = np.array(precipitaciones["Febrero"])
    #Clustering KMean

    X = np.column_stack((column1, column2))
    # Instantiate the k-means algorithm with 3 clusters
    kmeans = KMeans(n_clusters=3)

    # Fit the algorithm to the data
    kmeans.fit(X)

    # Get the cluster centers and labels
    centers = kmeans.cluster_centers_
    labels = kmeans.labels_

    # Plot the data and the cluster centers
    plt.scatter(X[:, 0], X[:, 1], c=labels)
    plt.scatter(centers[:, 0], centers[:, 1], marker='x', s=200, linewidths=3, color='r')
    plt.show()

def AlgorithmOPTICS():
    #####

    # Generate sample data
    column1 = np.array(precipitaciones["Enero"])
    column2 = np.array(precipitaciones["Febrero"])
    column3 = np.array(precipitaciones["Febrero"])
    column4 = np.array(precipitaciones["Febrero"])
    np.random.seed(0)
    n_points_per_cluster = 1250

    C1 = [-5, -2] + 0.8 *  column1
    C2 = [4, -1] + 0.1 *  column2
    C3 = [1, -2] + 0.2 *  column3
    C4 = [-2, 3] + 0.3 *  column4
    X = np.vstack((C1, C2, C3, C4 ))

    clust = OPTICS(min_samples=45, xi=0.05, min_cluster_size=0.05)

    # Run the fit
    clust.fit(X)

    labels_050 = cluster_optics_dbscan(
        reachability=clust.reachability_,
        core_distances=clust.core_distances_,
        ordering=clust.ordering_,
        eps=0.5,
    )
    labels_200 = cluster_optics_dbscan(
        reachability=clust.reachability_,
        core_distances=clust.core_distances_,
        ordering=clust.ordering_,
        eps=2,
    )

    space = np.arange(len(X))
    reachability = clust.reachability_[clust.ordering_]
    labels = clust.labels_[clust.ordering_]

    plt.figure(figsize=(10, 7))
    G = gridspec.GridSpec(2, 3)
    ax1 = plt.subplot(G[0, :])
    ax2 = plt.subplot(G[1, 0])
    ax3 = plt.subplot(G[1, 1])
    ax4 = plt.subplot(G[1, 2])

    # Reachability plot
    colors = ["g.", "r.", "b.", "y.", "c."]
    for klass, color in zip(range(0, 5), colors):
        Xk = space[labels == klass]
        Rk = reachability[labels == klass]
        ax1.plot(Xk, Rk, color, alpha=0.3)
    ax1.plot(space[labels == -1], reachability[labels == -1], "k.", alpha=0.3)
    ax1.plot(space, np.full_like(space, 2.0, dtype=float), "k-", alpha=0.5)
    ax1.plot(space, np.full_like(space, 0.5, dtype=float), "k-.", alpha=0.5)
    ax1.set_ylabel("Reachability (epsilon distance)")
    ax1.set_title("Reachability Plot")

    # OPTICS
    colors = ["g.", "r.", "b.", "y.", "c."]
    for klass, color in zip(range(0, 5), colors):
        Xk = X[clust.labels_ == klass]
        ax2.plot(Xk[:, 0], Xk[:, 1], color, alpha=0.3)
    ax2.plot(X[clust.labels_ == -1, 0], X[clust.labels_ == -1, 1], "k+", alpha=0.1)
    ax2.set_title("Automatic Clustering\nOPTICS")

    # DBSCAN at 0.5
    colors = ["g.", "r.", "b.", "c."]
    for klass, color in zip(range(0, 4), colors):
        Xk = X[labels_050 == klass]
        ax3.plot(Xk[:, 0], Xk[:, 1], color, alpha=0.3)
    ax3.plot(X[labels_050 == -1, 0], X[labels_050 == -1, 1], "k+", alpha=0.1)
    ax3.set_title("Clustering at 0.5 epsilon cut\nDBSCAN")

    # DBSCAN at 2.
    colors = ["g.", "m.", "y.", "c."]
    for klass, color in zip(range(0, 4), colors):
        Xk = X[labels_200 == klass]
        ax4.plot(Xk[:, 0], Xk[:, 1], color, alpha=0.3)
    ax4.plot(X[labels_200 == -1, 0], X[labels_200 == -1, 1], "k+", alpha=0.1)
    ax4.set_title("Clustering at 2.0 epsilon cut\nDBSCAN")

    plt.tight_layout()
    plt.show()


def AlgorithmLinearRegresion():
    column1 = np.array(precipitaciones["Enero"])
    column2 = np.array(precipitaciones["Febrero"])
    x = column1
    y = column2
    X = x.reshape(-1, 1)
    model = LinearRegression()
    model.fit(X, y)
    x_test = np.array([6]).reshape(-1, 1)
    y_pred = model.predict(x_test)
    plt.scatter(x, y)
    plt.plot(x, model.predict(X))
    plt.xlabel('Causa')
    plt.ylabel('Vulnerable')
    plt.title('Regresion Linear utilizando(Causa,Vulnerable)')
    plt.show()


AlgorithmOPTICS()
AlgorithKMean()
AlgorithmLinearRegresion()


def RegresionLineal():
    x = Enfermedades_Cardiacas_Baoruco_2019['Enfermedades_cardiacas']
    y = Enfermedades_Cardiacas_Baoruco_2019['Defunciones']
    X = x.reshape(-1, 1)
    model = LinearRegression()
    model.fit(X, y)
    x_test = np.array([6]).reshape(-1, 1)
    y_pred = model.predict(x_test)

    plt.scatter(x, y)
    plt.plot(x, model.predict(X))
    plt.xlabel('VolumenDis')
    plt.ylabel('AltitudTer')
    plt.title('Regresion Linear utilizando(Enfermedades_cardiacas, Defunciones)')
    plt.show()

RegresionLineal()


def RegresionLineal2():
    df = pd.read_csv("Enfermedades_Cardiacas_Baoruco_2019.csv", delimiter=";")
    x = df.iloc[:, 1].values
    y = df.iloc[:, 2].values
    x = x.reshape(-1, 1)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.33)
    regressor = LinearRegression()
    regressor.fit(x_train , y_train)
    pred = regressor.predict(x_test)


    plt.scatter(x_train, y_train)
    plt.plot(x_train, regressor.predict(x_train), color='black')
    plt.title("Modelo de regresion lineal")
    plt.xlabel("Enfermedades_Cardiacas")
    plt.ylabel("Defunciones")
    plt.show()


RegresionLineal2()


def AlgorithKMean3():
    df = pd.read_csv("Enfermedades_Cardiacas_Baoruco_2019.csv", delimiter=";")
    x = df.iloc[:, 1].values
    y =  # df.iloc[:, 2].values
    column1 = x # np.array(precipitaciones["Enero"])
    column2 = y # np.array(precipitaciones["Febrero"])
    #Clustering KMean

    X = np.column_stack((column1, column2))
    # Instantiate the k-means algorithm with 3 clusters
    kmeans = KMeans(n_clusters=3)

    # Fit the algorithm to the data
    kmeans.fit(X)

    # Get the cluster centers and labels
    centers = kmeans.cluster_centers_
    labels = kmeans.labels_

    # Plot the data and the cluster centers
    plt.scatter(X[:, 0], X[:, 1], c=labels)
    plt.scatter(centers[:, 0], centers[:, 1], marker='x', s=200, linewidths=3, color='r')
    plt.show()
AlgorithKMean3()
	import fiona
	import numpy
	import matplotlib.pyplot as plotter
	import folium
	import geopandas
	import pyproj
	from sklearn import linear_model
	from shapely.geometry import Point, Polygon
	from sklearn.cluster import KMeans
	import numpy as np
	import matplotlib.pyplot as plt
	from sklearn.cluster import OPTICS, cluster_optics_dbscan
	import matplotlib.gridspec as gridspec
	import matplotlib.pyplot as plt
	import numpy as np
	import datatable as dt
	from sklearn.linear_model import LinearRegression


	def AlgorithKMean():

	column1 = np.array(precipitaciones["Enero"])
	column2 = np.array(precipitaciones["Febrero"])
	#Clustering KMean

	X = np.column_stack((column1, column2))
	# Instantiate the k-means algorithm with 3 clusters
	kmeans = KMeans(n_clusters=3)

	# Fit the algorithm to the data
	kmeans.fit(X)

	# Get the cluster centers and labels
	centers = kmeans.cluster_centers_
	labels = kmeans.labels_

	# Plot the data and the cluster centers
	plt.scatter(X[:, 0], X[:, 1], c=labels)
	plt.scatter(centers[:, 0], centers[:, 1], marker='x', s=200, linewidths=3, color='r')
	plt.show()

	def AlgorithmOPTICS():
	#####

	# Generate sample data
	column1 = np.array(precipitaciones["Enero"])
	column2 = np.array(precipitaciones["Febrero"])
	column3 = np.array(precipitaciones["Febrero"])
	column4 = np.array(precipitaciones["Febrero"])
	np.random.seed(0)
	n_points_per_cluster = 1250

	C1 = [-5, -2] + 0.8 * column1
	C2 = [4, -1] + 0.1 * column2
	C3 = [1, -2] + 0.2 * column3
	C4 = [-2, 3] + 0.3 * column4
	X = np.vstack((C1, C2, C3, C4 ))

	clust = OPTICS(min_samples=45, xi=0.05, min_cluster_size=0.05)

	# Run the fit
	clust.fit(X)

	labels_050 = cluster_optics_dbscan(
	reachability=clust.reachability_,
	core_distances=clust.core_distances_,
	ordering=clust.ordering_,
	eps=0.5,
	)
	labels_200 = cluster_optics_dbscan(
	reachability=clust.reachability_,
	core_distances=clust.core_distances_,
	ordering=clust.ordering_,
	eps=2,
	)

	space = np.arange(len(X))
	reachability = clust.reachability_[clust.ordering_]
	labels = clust.labels_[clust.ordering_]

	plt.figure(figsize=(10, 7))
	G = gridspec.GridSpec(2, 3)
	ax1 = plt.subplot(G[0, :])
	ax2 = plt.subplot(G[1, 0])
	ax3 = plt.subplot(G[1, 1])
	ax4 = plt.subplot(G[1, 2])

	# Reachability plot
	colors = ["g.", "r.", "b.", "y.", "c."]
	for klass, color in zip(range(0, 5), colors):
	Xk = space[labels == klass]
	Rk = reachability[labels == klass]
	ax1.plot(Xk, Rk, color, alpha=0.3)
	ax1.plot(space[labels == -1], reachability[labels == -1], "k.", alpha=0.3)
	ax1.plot(space, np.full_like(space, 2.0, dtype=float), "k-", alpha=0.5)
	ax1.plot(space, np.full_like(space, 0.5, dtype=float), "k-.", alpha=0.5)
	ax1.set_ylabel("Reachability (epsilon distance)")
	ax1.set_title("Reachability Plot")

	# OPTICS
	colors = ["g.", "r.", "b.", "y.", "c."]
	for klass, color in zip(range(0, 5), colors):
	Xk = X[clust.labels_ == klass]
	ax2.plot(Xk[:, 0], Xk[:, 1], color, alpha=0.3)
	ax2.plot(X[clust.labels_ == -1, 0], X[clust.labels_ == -1, 1], "k+", alpha=0.1)
	ax2.set_title("Automatic Clustering\nOPTICS")

	# DBSCAN at 0.5
	colors = ["g.", "r.", "b.", "c."]
	for klass, color in zip(range(0, 4), colors):
	Xk = X[labels_050 == klass]
	ax3.plot(Xk[:, 0], Xk[:, 1], color, alpha=0.3)
	ax3.plot(X[labels_050 == -1, 0], X[labels_050 == -1, 1], "k+", alpha=0.1)
	ax3.set_title("Clustering at 0.5 epsilon cut\nDBSCAN")

	# DBSCAN at 2.
	colors = ["g.", "m.", "y.", "c."]
	for klass, color in zip(range(0, 4), colors):
	Xk = X[labels_200 == klass]
	ax4.plot(Xk[:, 0], Xk[:, 1], color, alpha=0.3)
	ax4.plot(X[labels_200 == -1, 0], X[labels_200 == -1, 1], "k+", alpha=0.1)
	ax4.set_title("Clustering at 2.0 epsilon cut\nDBSCAN")

	plt.tight_layout()
	plt.show()


	def AlgorithmLinearRegresion():
	column1 = np.array(precipitaciones["Enero"])
	column2 = np.array(precipitaciones["Febrero"])
	x = column1
	y = column2
	X = x.reshape(-1, 1)
	model = LinearRegression()
	model.fit(X, y)
	x_test = np.array([6]).reshape(-1, 1)
	y_pred = model.predict(x_test)
	plt.scatter(x, y)
	plt.plot(x, model.predict(X))
	plt.xlabel('Causa')
	plt.ylabel('Vulnerable')
	plt.title('Regresion Linear utilizando(Causa,Vulnerable)')
	plt.show()



	AlgorithmOPTICS()
	AlgorithKMean()
	AlgorithmLinearRegresion()




	def RegresionLineal():
	x = Enfermedades_Cardiacas_Baoruco_2019['Enfermedades_cardiacas']
	y = Enfermedades_Cardiacas_Baoruco_2019['Defunciones']
	X = x.reshape(-1, 1)
	model = LinearRegression()
	model.fit(X, y)
	x_test = np.array([6]).reshape(-1, 1)
	y_pred = model.predict(x_test)

	plt.scatter(x, y)
	plt.plot(x, model.predict(X))
	plt.xlabel('VolumenDis')
	plt.ylabel('AltitudTer')
	plt.title('Regresion Linear utilizando(Enfermedades_cardiacas, Defunciones)')
	plt.show()

	RegresionLineal()





	def RegresionLineal2():
	df = pd.read_csv("Enfermedades_Cardiacas_Baoruco_2019.csv", delimiter=";")
	x = df.iloc[:, 1].values
	y = df.iloc[:, 2].values
	x = x.reshape(-1, 1)
	x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.33)
	regressor = LinearRegression()
	regressor.fit(x_train , y_train)
	pred = regressor.predict(x_test)


	plt.scatter(x_train, y_train)
	plt.plot(x_train, regressor.predict(x_train), color='black')
	plt.title("Modelo de regresion lineal")
	plt.xlabel("Enfermedades_Cardiacas")
	plt.ylabel("Defunciones")
	plt.show()


	RegresionLineal2()



	def AlgorithKMean3():
	df = pd.read_csv("Enfermedades_Cardiacas_Baoruco_2019.csv", delimiter=";")
	x = df.iloc[:, 1].values
	y = # df.iloc[:, 2].values
	column1 = x # np.array(precipitaciones["Enero"])
	column2 = y # np.array(precipitaciones["Febrero"])
	#Clustering KMean

	X = np.column_stack((column1, column2))
	# Instantiate the k-means algorithm with 3 clusters
	kmeans = KMeans(n_clusters=3)

	# Fit the algorithm to the data
	kmeans.fit(X)

	# Get the cluster centers and labels
	centers = kmeans.cluster_centers_
	labels = kmeans.labels_

	# Plot the data and the cluster centers
	plt.scatter(X[:, 0], X[:, 1], c=labels)
	plt.scatter(centers[:, 0], centers[:, 1], marker='x', s=200, linewidths=3, color='r')
	plt.show()
	AlgorithKMean3()