-
-
Save NielsMinssen/fd7cf09b208329a1a41e322291a821b8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Chargement des lemmes de la langue française | |
nlp = spacy.load('fr_core_news_md') | |
#liste vide pour aceuillir les mots lematisés | |
clean_words_lem = [] | |
#remplissage de la liste avec les mots lematisés | |
clean_words=nlp(" ".join(clean_words)) | |
for w in clean_words: | |
clean_words_lem.append(w.lemma_) | |
#afficher les k n-grammes les plus communs dans le texte, ici les 100 premiers trigrammes | |
n=3 | |
k=100 | |
ngram = list(nltk.ngrams(clean_words_lem,n)) | |
fdist = FreqDist(ngram) | |
print(fdist.most_common(k)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment