Skip to content

Instantly share code, notes, and snippets.

@yubessy
Created June 29, 2014 03:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yubessy/7c590984bb19b89b8c6d to your computer and use it in GitHub Desktop.
Save yubessy/7c590984bb19b89b8c6d to your computer and use it in GitHub Desktop.
TextRank
# -*- coding: utf-8 -*-
# third-party lib
import networkx
def blank_split(text):
u"""
半角スペースによる単語分割
"""
return text.split()
def ngram(words, window=2):
u"""
単語のリストからN-gramのリストを作成
"""
result = list()
for i in range(len(words) - window + 1):
result.append(tuple(words[i:i+window]))
return result
def combination(words):
u"""
単語のリストから単語の全組み合わせのリストを作成
"""
return list(set([(x, y) for x in words for y in words if x > y]))
def textgraph(text, tokenizer=blank_split, window=2):
u"""
テキストグラフを作成
"""
words = tokenizer(text)
graph = networkx.Graph()
ng = ngram(words, window)
for ng_unit in ng:
edges = combination(ng_unit)
graph.add_edges_from(edges)
return graph
def textrank(graph):
u"""
グラフに対してPageRankを計算
"""
return networkx.pagerank(graph)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment