Thomas Niebler thomasniebler

## gist:03c85200aecb55c256ce152352fa46f9
def generate_random_walks(page_ids, adjacency_list, num_walks=10, len_walks=20):
    """
    convenience method to generate a list of numWalks random walks. This saves a random walk in targetPath.
    :param page_ids: an RDD of page ids for which the random walks should be generated.
    :param adjacency_list: a simple RDD with tuples of the form (page_id, [list(id)]).
    :param num_walks: optional. The number of walks, which are to be generated for each page id.
    :param len_walks: optional. The maximum length of each walk.
    :return: a RDD of random walks
    """
    walkers = page_ids.flatMap(lambda page_id: [(page_id, [page_id])] * num_walks)

## prepare_indy.sh
#!/bin/bash

for i in indiana/*.tar.gz; do
        echo $i
        sudo tar xf $i -C indianatmp/
        cd indianatmp
        sudo chmod 644 *
        for j in 20*; do
                cleanname=$(echo wiki_$j | sed s/:/_/g | sed s/+/_/g)
                ../filter_indy.py $j && rm $j && mv wiki_$j $cleanname && hdfs dfs -put $cleanname /data/indiana/click/  &

## filter_indy.py
#!/usr/bin/env python

import sys
import struct
from datetime import datetime

with open(sys.argv[1]) as file:
        outfile = open("wiki_" + sys.argv[1], "w")
        lines = [line.strip() for line in file.readlines()[1:]]
        records = [lines[3*i:3*i+3] for i in range(len(lines) / 3)]

## analogyeval.py
import pandas
import numpy as np
from sklearn.preprocessing import normalize

# load any set of word embeddings like that
vecs = {"word": np.array([0, 1, 2, 3, 4, 5]}
# load the word2vec analogies
analogies = pandas.read_csv("questions-words.txt", names=["a", "b", "c", "d"], sep=" ")


## install-polybar-ex.sh
#!/usr/bin/env bash

# Builds Polybar on Fedora-based systems
# Tested on Fedora 25, Fedora 26 and Korora 25, untested on others
# To get started:
#
# wget -O- https://gist.githubusercontent.com/nathanchere/22491daf4f917b100a35e5c284a5fec5/raw/install-polybar-ex.sh | bash

sudo dnf install -y cmake @development-tools gcc-c++ i3-ipc jsoncpp-devel alsa-lib-devel wireless-tools-devel libmpdclient-devel libcurl-devel cairo-devel xcb-proto xcb-util-devel xcb-util-wm-devel xcb-util-image-devel

## simple_animation_plot.py
fig, ax = plt.subplots()

ax.set_xlim(( 0, 2))
ax.set_ylim((-2, 2))

line1, = ax.plot([], [], lw=2)
line2, = ax.plot([], [], lw=2)
line3, = ax.plot([], [], lw=2)
	def generate_random_walks(page_ids, adjacency_list, num_walks=10, len_walks=20):
	"""
	convenience method to generate a list of numWalks random walks. This saves a random walk in targetPath.
	:param page_ids: an RDD of page ids for which the random walks should be generated.
	:param adjacency_list: a simple RDD with tuples of the form (page_id, [list(id)]).
	:param num_walks: optional. The number of walks, which are to be generated for each page id.
	:param len_walks: optional. The maximum length of each walk.
	:return: a RDD of random walks
	"""
	walkers = page_ids.flatMap(lambda page_id: [(page_id, [page_id])] * num_walks)
	#!/bin/bash

	for i in indiana/*.tar.gz; do
	echo $i
	sudo tar xf $i -C indianatmp/
	cd indianatmp
	sudo chmod 644 *
	for j in 20*; do
	cleanname=$(echo wiki_$j \| sed s/:/_/g \| sed s/+/_/g)
	../filter_indy.py $j && rm $j && mv wiki_$j $cleanname && hdfs dfs -put $cleanname /data/indiana/click/ &
	#!/usr/bin/env python

	import sys
	import struct
	from datetime import datetime

	with open(sys.argv[1]) as file:
	outfile = open("wiki_" + sys.argv[1], "w")
	lines = [line.strip() for line in file.readlines()[1:]]
	records = [lines[3i:3i+3] for i in range(len(lines) / 3)]
	import pandas
	import numpy as np
	from sklearn.preprocessing import normalize

	# load any set of word embeddings like that
	vecs = {"word": np.array([0, 1, 2, 3, 4, 5]}
	# load the word2vec analogies
	analogies = pandas.read_csv("questions-words.txt", names=["a", "b", "c", "d"], sep=" ")
	#!/usr/bin/env bash

	# Builds Polybar on Fedora-based systems
	# Tested on Fedora 25, Fedora 26 and Korora 25, untested on others
	# To get started:
	#
	# wget -O- https://gist.githubusercontent.com/nathanchere/22491daf4f917b100a35e5c284a5fec5/raw/install-polybar-ex.sh \| bash

	sudo dnf install -y cmake @development-tools gcc-c++ i3-ipc jsoncpp-devel alsa-lib-devel wireless-tools-devel libmpdclient-devel libcurl-devel cairo-devel xcb-proto xcb-util-devel xcb-util-wm-devel xcb-util-image-devel
	fig, ax = plt.subplots()

	ax.set_xlim(( 0, 2))
	ax.set_ylim((-2, 2))

	line1, = ax.plot([], [], lw=2)
	line2, = ax.plot([], [], lw=2)
	line3, = ax.plot([], [], lw=2)