public
Created

PageRank for text summarization

  • Download Gist
TextRank.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
import networkx as nx
import numpy as np
from nltk.tokenize.punkt import PunktSentenceTokenizer
from sklearn.feature_extraction.text import TfidfTransformer, CountVectorizer
def textrank(document):
sentence_tokenizer = PunktSentenceTokenizer()
sentences = sentence_tokenizer.tokenize(document)
bow_matrix = CountVectorizer().fit_transform(sentences)
normalized = TfidfTransformer().fit_transform(bow_matrix)
similarity_graph = normalized * normalized.T
nx_graph = nx.from_scipy_sparse_matrix(similarity_graph)
scores = nx.pagerank(nx_graph)
return sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.