This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def generate_random_walks(page_ids, adjacency_list, num_walks=10, len_walks=20): | |
""" | |
convenience method to generate a list of numWalks random walks. This saves a random walk in targetPath. | |
:param page_ids: an RDD of page ids for which the random walks should be generated. | |
:param adjacency_list: a simple RDD with tuples of the form (page_id, [list(id)]). | |
:param num_walks: optional. The number of walks, which are to be generated for each page id. | |
:param len_walks: optional. The maximum length of each walk. | |
:return: a RDD of random walks | |
""" | |
walkers = page_ids.flatMap(lambda page_id: [(page_id, [page_id])] * num_walks) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fig, ax = plt.subplots() | |
ax.set_xlim(( 0, 2)) | |
ax.set_ylim((-2, 2)) | |
line1, = ax.plot([], [], lw=2) | |
line2, = ax.plot([], [], lw=2) | |
line3, = ax.plot([], [], lw=2) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# Builds Polybar on Fedora-based systems | |
# Tested on Fedora 25, Fedora 26 and Korora 25, untested on others | |
# To get started: | |
# | |
# wget -O- https://gist.githubusercontent.com/nathanchere/22491daf4f917b100a35e5c284a5fec5/raw/install-polybar-ex.sh | bash | |
sudo dnf install -y cmake @development-tools gcc-c++ i3-ipc jsoncpp-devel alsa-lib-devel wireless-tools-devel libmpdclient-devel libcurl-devel cairo-devel xcb-proto xcb-util-devel xcb-util-wm-devel xcb-util-image-devel |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas | |
import numpy as np | |
from sklearn.preprocessing import normalize | |
# load any set of word embeddings like that | |
vecs = {"word": np.array([0, 1, 2, 3, 4, 5]} | |
# load the word2vec analogies | |
analogies = pandas.read_csv("questions-words.txt", names=["a", "b", "c", "d"], sep=" ") | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
import struct | |
from datetime import datetime | |
with open(sys.argv[1]) as file: | |
outfile = open("wiki_" + sys.argv[1], "w") | |
lines = [line.strip() for line in file.readlines()[1:]] | |
records = [lines[3*i:3*i+3] for i in range(len(lines) / 3)] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
for i in indiana/*.tar.gz; do | |
echo $i | |
sudo tar xf $i -C indianatmp/ | |
cd indianatmp | |
sudo chmod 644 * | |
for j in 20*; do | |
cleanname=$(echo wiki_$j | sed s/:/_/g | sed s/+/_/g) | |
../filter_indy.py $j && rm $j && mv wiki_$j $cleanname && hdfs dfs -put $cleanname /data/indiana/click/ & |