yubessy/mytextrank.py

## mytextrank.py
# -*- coding: utf-8 -*-
# third-party lib
import networkx


def blank_split(text):
    u"""
    半角スペースによる単語分割
    """
    return text.split()


def ngram(words, window=2):
    u"""
    単語のリストからN-gramのリストを作成
    """
    result = list()
    for i in range(len(words) - window + 1):
        result.append(tuple(words[i:i+window]))
    return result


def combination(words):
    u"""
    単語のリストから単語の全組み合わせのリストを作成
    """
    return list(set([(x, y) for x in words for y in words if x > y]))


def textgraph(text, tokenizer=blank_split, window=2):
    u"""
    テキストグラフを作成
    """
    words = tokenizer(text)
    graph = networkx.Graph()
    ng = ngram(words, window)
    for ng_unit in ng:
        edges = combination(ng_unit)
        graph.add_edges_from(edges)
    return graph


def textrank(graph):
    u"""
    グラフに対してPageRankを計算
    """
    return networkx.pagerank(graph)
	# -- coding: utf-8 --
	# third-party lib
	import networkx


	def blank_split(text):
	u"""
	半角スペースによる単語分割
	"""
	return text.split()


	def ngram(words, window=2):
	u"""
	単語のリストからN-gramのリストを作成
	"""
	result = list()
	for i in range(len(words) - window + 1):
	result.append(tuple(words[i:i+window]))
	return result


	def combination(words):
	u"""
	単語のリストから単語の全組み合わせのリストを作成
	"""
	return list(set([(x, y) for x in words for y in words if x > y]))


	def textgraph(text, tokenizer=blank_split, window=2):
	u"""
	テキストグラフを作成
	"""
	words = tokenizer(text)
	graph = networkx.Graph()
	ng = ngram(words, window)
	for ng_unit in ng:
	edges = combination(ng_unit)
	graph.add_edges_from(edges)
	return graph


	def textrank(graph):
	u"""
	グラフに対してPageRankを計算
	"""
	return networkx.pagerank(graph)