igor-shevchenko/pagerank.py

## pagerank.py
# coding: utf-8
from collections import Counter
import networkx as nx


def get_cities():
    with open('cities.txt') as f:
        for line in f:
            yield line.strip().decode('utf8').lower()


def get_edge(city):
    return city[0], city.strip(u'ъыь')[-1]


def get_weighted_edges(cities):
    counter = Counter(get_edge(city) for city in cities if not city.startswith(u'ы'))
    return [(a, b, float(weight)) for (a, b), weight in counter.iteritems()]


def get_graph():
    cities = get_cities()
    edges = get_weighted_edges(cities)
    g = nx.DiGraph()
    g.add_weighted_edges_from(edges)
    return g


def add_final_vertices(g):
    g.add_weighted_edges_from((v, v + u'_finish', 1.0) for v in g.nodes())


def evaluate_letters():
    g = get_graph()
    add_final_vertices(g)
    pr = nx.pagerank(g)

    for a, b in sorted(pr.items(), key=lambda x: x[1]):
        if a[1:] == '_finish':
            print a[0], b

evaluate_letters()

## shiritori.py
# coding: utf-8
from collections import Counter
from random import choice
import networkx as nx


def get_cities():
    with open('cities.txt') as f:
        for line in f:
            yield line.strip().decode('utf8').lower()


def get_edge(city):
    return city[0], city.strip(u'ъыь')[-1]


def get_weighted_edges(cities):
    counter = Counter(get_edge(city) for city in cities if not city.startswith(u'ы'))
    return [(a, b, float(weight)) for (a, b), weight in counter.iteritems()]


def get_graph():
    cities = get_cities()
    edges = get_weighted_edges(cities)
    g = nx.DiGraph()
    g.add_weighted_edges_from(edges)
    return g


def get_out_weight(graph, vertex):
    return sum(d['weight'] for u, v, d in graph.out_edges(vertex, data=True))


def choose_next_vertex(graph, vertex):
    possible_moves = [n for n in graph.neighbors(vertex) if graph[vertex][n]['weight'] > 0]
    if not possible_moves:
        return None
    scores = min((get_out_weight(graph, n), n) for n in possible_moves)
    return scores[1]


def run_shiritori_algorithm():
    g = get_graph()
    current_letter = choice(g.nodes())
    i = 0
    while True:
        next_letter = choose_next_vertex(g, current_letter)
        if not next_letter:
            print current_letter, i
            break
        g[current_letter][next_letter]['weight'] -= 1
        i += 1
        current_letter = next_letter

run_shiritori_algorithm()

## wikiget.py
# coding: utf-8
import codecs
import mwclient


def get_page_names():
    site = mwclient.Site('ru.wikipedia.org')
    category = site.Pages[u'Категория:Населённые пункты по алфавиту']
    return (page.name for page in category)


def is_letter(c):
    return u'а' <= c.lower() <= u'я'


def starts_and_ends_with_letter(name):
    first = name[0]
    last = name[-1]
    return is_letter(first) and is_letter(last)


def remove_braces(name):
    return name.split('(')[0].strip()


def get_cities():
    used_cities = set()
    for name in get_page_names():
        city = remove_braces(name)
        if starts_and_ends_with_letter(city) and city not in used_cities:
            used_cities.add(city)
            yield city


with codecs.open('cities.txt', 'w', 'utf8') as f:
    for city in get_cities():
        f.write(city + '\n')
	# coding: utf-8
	from collections import Counter
	import networkx as nx


	def get_cities():
	with open('cities.txt') as f:
	for line in f:
	yield line.strip().decode('utf8').lower()


	def get_edge(city):
	return city[0], city.strip(u'ъыь')[-1]


	def get_weighted_edges(cities):
	counter = Counter(get_edge(city) for city in cities if not city.startswith(u'ы'))
	return [(a, b, float(weight)) for (a, b), weight in counter.iteritems()]


	def get_graph():
	cities = get_cities()
	edges = get_weighted_edges(cities)
	g = nx.DiGraph()
	g.add_weighted_edges_from(edges)
	return g


	def add_final_vertices(g):
	g.add_weighted_edges_from((v, v + u'_finish', 1.0) for v in g.nodes())


	def evaluate_letters():
	g = get_graph()
	add_final_vertices(g)
	pr = nx.pagerank(g)

	for a, b in sorted(pr.items(), key=lambda x: x[1]):
	if a[1:] == '_finish':
	print a[0], b

	evaluate_letters()
	# coding: utf-8
	from collections import Counter
	from random import choice
	import networkx as nx


	def get_cities():
	with open('cities.txt') as f:
	for line in f:
	yield line.strip().decode('utf8').lower()


	def get_edge(city):
	return city[0], city.strip(u'ъыь')[-1]


	def get_weighted_edges(cities):
	counter = Counter(get_edge(city) for city in cities if not city.startswith(u'ы'))
	return [(a, b, float(weight)) for (a, b), weight in counter.iteritems()]


	def get_graph():
	cities = get_cities()
	edges = get_weighted_edges(cities)
	g = nx.DiGraph()
	g.add_weighted_edges_from(edges)
	return g


	def get_out_weight(graph, vertex):
	return sum(d['weight'] for u, v, d in graph.out_edges(vertex, data=True))


	def choose_next_vertex(graph, vertex):
	possible_moves = [n for n in graph.neighbors(vertex) if graph[vertex][n]['weight'] > 0]
	if not possible_moves:
	return None
	scores = min((get_out_weight(graph, n), n) for n in possible_moves)
	return scores[1]


	def run_shiritori_algorithm():
	g = get_graph()
	current_letter = choice(g.nodes())
	i = 0
	while True:
	next_letter = choose_next_vertex(g, current_letter)
	if not next_letter:
	print current_letter, i
	break
	g[current_letter][next_letter]['weight'] -= 1
	i += 1
	current_letter = next_letter

	run_shiritori_algorithm()
	# coding: utf-8
	import codecs
	import mwclient


	def get_page_names():
	site = mwclient.Site('ru.wikipedia.org')
	category = site.Pages[u'Категория:Населённые пункты по алфавиту']
	return (page.name for page in category)


	def is_letter(c):
	return u'а' <= c.lower() <= u'я'


	def starts_and_ends_with_letter(name):
	first = name[0]
	last = name[-1]
	return is_letter(first) and is_letter(last)


	def remove_braces(name):
	return name.split('(')[0].strip()


	def get_cities():
	used_cities = set()
	for name in get_page_names():
	city = remove_braces(name)
	if starts_and_ends_with_letter(city) and city not in used_cities:
	used_cities.add(city)
	yield city


	with codecs.open('cities.txt', 'w', 'utf8') as f:
	for city in get_cities():
	f.write(city + '\n')