Skip to content
Create a gist now

Instantly share code, notes, and snippets.

Embed URL


Subversion checkout URL

You can clone with
Download ZIP
PageRank for text summarization
import networkx as nx
import numpy as np
from nltk.tokenize.punkt import PunktSentenceTokenizer
from sklearn.feature_extraction.text import TfidfTransformer, CountVectorizer
def textrank(document):
sentence_tokenizer = PunktSentenceTokenizer()
sentences = sentence_tokenizer.tokenize(document)
bow_matrix = CountVectorizer().fit_transform(sentences)
normalized = TfidfTransformer().fit_transform(bow_matrix)
similarity_graph = normalized * normalized.T
nx_graph = nx.from_scipy_sparse_matrix(similarity_graph)
scores = nx.pagerank(nx_graph)
return sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.