Skip to content

Instantly share code, notes, and snippets.

@andrea-dagostino
Last active November 22, 2021 15:28
Show Gist options
  • Save andrea-dagostino/f40eea6a731a1ad1376fc70c5dcf3aab to your computer and use it in GitHub Desktop.
Save andrea-dagostino/f40eea6a731a1ad1376fc70c5dcf3aab to your computer and use it in GitHub Desktop.
posts/raggruppamento-testuale-con-tf-idf
# importiamo le librerie necessarie da sklearn
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
# importiamo le altre librerie necessarie
import pandas as pd
import numpy as np
# librerie per la manipolazione del testo
import re
import string
import nltk
from nltk.corpus import stopwords
# importiamo le librerie di visualizzazione
import matplotlib.pyplot as plt
categories = [
'comp.graphics',
'comp.os.ms-windows.misc',
'rec.sport.baseball',
'rec.sport.hockey',
'alt.atheism',
'soc.religion.christian',
]
dataset = fetch_20newsgroups(subset='train', categories=categories, shuffle=True, remove=('headers', 'footers', 'quotes'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment