Skip to content

Instantly share code, notes, and snippets.

View pateelhs's full-sized avatar

Pateel H S pateelhs

  • Agiledge process solutions Pvt Ltd
View GitHub Profile
@pateelhs
pateelhs / clustering.py
Last active February 14, 2024 08:56
DBSCAN NEW CLUSTERING
# clustering.py
from sklearn.cluster import DBSCAN
def cluster_tickets_advanced(tfidf_matrix, min_samples=5, eps=0.5):
dbscan = DBSCAN(eps=eps, min_samples=min_samples, metric='cosine')
clusters = dbscan.fit_predict(tfidf_matrix)
return clusters
@pateelhs
pateelhs / clustering.py
Last active February 13, 2024 06:04
data_processing.py
import numpy as np
from sklearn.cluster import KMeans
def cluster_tickets(tfidf_matrix, num_clusters=10):
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
clusters = kmeans.fit_predict(tfidf_matrix)
return clusters
import pandas as pd
import numpy as np
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
# Module 1: Data Processing
def preprocess_text(text):
stop_words = set(stopwords.words('english'))