Andrea D'Agostino andrea-dagostino

## fuzzy_logic_tagging_ita4.py
def fuzzy_tagging(tags, articles):
	"""
	Questa funzione riceve in input una lista di tag predefiniti e la lista di contenuto testuale da taggare.
	Restituisce un dataframe Pandas con gli articoli taggati
	"""
	results = []
	# ciclo nei tag
	for i, tag in enumerate(tags):
	    d = {}
	    ranking = process.extract(tag, articles, limit=4)

## fuzzy_logic_tagging_ita3.py
# carichiamo un dataset e isoliamo i post
df = pd.read_csv('dataset.csv')
posts = df[df.url.str.contains('post')]
posts.reset_index(inplace=True, drop=True)
articles = list(posts.article)

## fuzzy_logic_tagging_ita2.py
# queste sono i tag che vogliamo applicare ai nostri documenti.
# cambiate questa lista a vostra discrezione
tags = [
    "machine learning",
    "clustering",
    "carriera",
    "progetto",
    "consigli",
    "analytics",
    "deep learning",

## fuzzy_logic_tagging_ita1.py
from thefuzz import process
import pandas as pd

## text_sim_tfidf_eng4.py
top = similarity_df[similarity_df > 0.4] # change this
mask = np.triu(np.ones_like(top))

# let's create the viz
plt.figure(figsize=(12, 12))
sns.heatmap(
			top,
			square=True,
			annot=True,
			robust=True,

## text_sim_tfidf_eng2.py
labels = posts.url.str.split('/').str[3:].str[1] # we extract the titles of the articles from the url
similarity_df = pd.DataFrame(M, columns=labels, index=labels) # let's create the dataframe
mask = np.triu(np.ones_like(similarity_df)) # we apply a mask to remove the top of the heatmap

# let's create the viz
plt.figure(figsize=(12, 12))
sns.heatmap(
			similarity_df,
			square=True,
			annot=True,

## text_sim_tfidf_eng1.py
M = np.zeros((posts.shape[0], posts.shape[0])) # we create a 30x30 matrix to contain the results of article_i with article_j


for i, row in tqdm(posts.iterrows(), total=posts.shape[0], desc='1st level'): # we define i
	for j, next_row in posts.iterrows(): # we define j
		M[i, j] = compute_similarity(row.article, next_row.article) # we populate the matrix with the results

## text_sim_tfidf_ita9.py
import pandas as pd
import numpy as np

import nltk
from nltk.corpus import stopwords
import string

from sklearn.feature_extraction.text import TfidfVectorizer

from tqdm import tqdm

## text_sim_tfidf_ita8.py
top = similarity_df[similarity_df > 0.4] # andiamo a modificare qui
mask = np.triu(np.ones_like(top))


sns.heatmap(
		top,
		square=True,
		annot=True,
		robust=True,
		fmt='.2f',

## text_sim_tfidf_ita7.py
labels = posts.url.str.split('/').str[3:].str[1] # estraiamo i titoli degli articoli dalle url
similarity_df = pd.DataFrame(M, columns=labels, index=labels) # creiamo un dataframe
mask = np.triu(np.ones_like(similarity_df)) # applichiamo una maschera per rimuovere la parte superiore della heatmap

# creiamo la visualizzazione
plt.figure(figsize=(12, 12))
sns.heatmap(
			similarity_df,
			square=True,
			annot=True,
	def fuzzy_tagging(tags, articles):
	"""
	Questa funzione riceve in input una lista di tag predefiniti e la lista di contenuto testuale da taggare.
	Restituisce un dataframe Pandas con gli articoli taggati
	"""
	results = []
	# ciclo nei tag
	for i, tag in enumerate(tags):
	d = {}
	ranking = process.extract(tag, articles, limit=4)
	# carichiamo un dataset e isoliamo i post
	df = pd.read_csv('dataset.csv')
	posts = df[df.url.str.contains('post')]
	posts.reset_index(inplace=True, drop=True)
	articles = list(posts.article)
	# queste sono i tag che vogliamo applicare ai nostri documenti.
	# cambiate questa lista a vostra discrezione
	tags = [
	"machine learning",
	"clustering",
	"carriera",
	"progetto",
	"consigli",
	"analytics",
	"deep learning",
	top = similarity_df[similarity_df > 0.4] # change this
	mask = np.triu(np.ones_like(top))

	# let's create the viz
	plt.figure(figsize=(12, 12))
	sns.heatmap(
	top,
	square=True,
	annot=True,
	robust=True,
	labels = posts.url.str.split('/').str[3:].str[1] # we extract the titles of the articles from the url
	similarity_df = pd.DataFrame(M, columns=labels, index=labels) # let's create the dataframe
	mask = np.triu(np.ones_like(similarity_df)) # we apply a mask to remove the top of the heatmap

	# let's create the viz
	plt.figure(figsize=(12, 12))
	sns.heatmap(
	similarity_df,
	square=True,
	annot=True,
	M = np.zeros((posts.shape[0], posts.shape[0])) # we create a 30x30 matrix to contain the results of article_i with article_j


	for i, row in tqdm(posts.iterrows(), total=posts.shape[0], desc='1st level'): # we define i
	for j, next_row in posts.iterrows(): # we define j
	M[i, j] = compute_similarity(row.article, next_row.article) # we populate the matrix with the results
	import pandas as pd
	import numpy as np

	import nltk
	from nltk.corpus import stopwords
	import string

	from sklearn.feature_extraction.text import TfidfVectorizer

	from tqdm import tqdm