wget https://datasets-documentation.s3.eu-west-3.amazonaws.com/nyc-taxi/nyc-taxi-vectors.csv.gz
gzip -d nyc-taxi-vectors.csv.gz
pip install scikit-learn
pip install pandas
import pandas as pd
from sklearn.cluster import KMeans
from ast import literal_eval
import time
start_time = time.time()
# Load the CSV file into a DataFrame
df = pd.read_csv('nyc-taxi-vectors.csv')
# Convert the string representation of vectors to actual lists
df['vector'] = df['vector'].apply(literal_eval)
# Convert lists to a list of lists for fitting the model
vectors = list(df['vector'])
# Perform KMeans clustering
kmeans = KMeans(n_clusters=5, random_state=42, n_init=1)
df['cluster'] = kmeans.fit_predict(vectors)
execution_time = (time.time() - start_time)
print('Execution time in seconds: ' + str(execution_time))