rp4ri/NLPUsefulCode.md

## NLPUsefulCode.md

      
    Raw
  

              NLPUsefulCode.md
            
          
    NLP useful code snippets

1. NLTK

Download the necessary data for NLTK

import nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')
Tokenize a sentence

from nltk.tokenize import word_tokenize

sentence = "Hola, ¿cómo estás hoy?"
tokens = word_tokenize(sentence)
print(tokens)
Remove stopwords

from nltk.corpus import stopwords

stop_words = set(stopwords.words('spanish'))
filtered_sentence = [w for w in tokens if not w in stop_words]
print(filtered_sentence)
Stemming

from nltk.stem import SnowballStemmer

stemmer = SnowballStemmer("spanish")
stemmed_sentence = [stemmer.stem(w) for w in filtered_sentence]
print(stemmed_sentence)
Lemmatization

from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()
lemmatized_sentence = [lemmatizer.lemmatize(w) for w in filtered_sentence]
print(lemmatized_sentence)
POS tagging

from nltk import pos_tag

tagged_sentence = nltk.pos_tag(tokens)
print(tagged_sentence)
Named Entity Recognition

from nltk import ne_chunk

named_entities = ne_chunk(tagged_sentence)
print(named_entities)
Create a frequency distribution

from nltk.probability import FreqDist

fdist = FreqDist(tokens)
print(fdist)
Plot the frequency distribution

import matplotlib.pyplot as plt

fdist.plot(30, cumulative=False)
plt.show()
Create a bigram

from nltk.util import ngrams

bigrams = ngrams(tokens, 2)
print(list(bigrams))
2. Spacy

Install Spacy

!pip install -U spacy
!python -m spacy download es_core_news_md
Load Spacy

import spacy
nlp = spacy.load("es_core_news_md")
Process a sentence and access the tokens

doc = nlp("Este es un ejemplo de texto en español.")
print([(token.text, token.pos_) for token in doc])
Tokenization, lemmatization and removal of stopwords

# Tokenización
tokens = [token.text for token in doc]

# Lematización
lemmas = [token.lemma_ for token in doc]

# Eliminación de stopwords
stopwords = spacy.lang.es.stop_words.STOP_WORDS
filtered_tokens = [token.text for token in doc if token.text not in stopwords]
Entity recognition and relation extraction

# Aplicar modelo de entidades
doc = nlp("El presidente de México, Andrés Manuel López Obrador, visitó la Ciudad de México.")
for ent in doc.ents:
    print(ent.text, ent.label_)

# Aplicar modelo de relaciones
from spacy import displacy
displacy.render(doc, style='dep', jupyter=True)
3. WordCloud

Install WordCloud

!pip install wordcloud
Create a WordCloud

# Importar la librería wordcloud
from wordcloud import WordCloud

# Cargar el texto en una variable
text = "Este es un ejemplo de texto para crear una nube de palabras"

# Crear una instancia de la clase WordCloud
wordcloud = WordCloud().generate(text)

# Mostrar la nube de palabras
import matplotlib.pyplot as plt
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()