Alex Moore-Niemi mooreniemi

## combine.lua
cjson = require "cjson"
ngx. req. read_body()                                       -- explicitly read the req body
local data = ngx. req. get_body_data()
local jsonBody = cjson. decode(data);
if data then
  local reqs = { }
  --ngx. say("body data:")

  for key, value in pairs(jsonBody) do
    --ngx.print(key)

## load_es.py
# a script to load seshat data into Elasticsearch
# seshat data is clearly graphdb data, but ES is nifty, so...
# assumes you have set up elasticsearch and kibana

import pandas as pd
from elasticsearch import Elasticsearch
from elasticsearch.helpers import parallel_bulk

# assumes local ES
es = Elasticsearch()

## ctr.py
# This is a version of 'Learning from Clicks' in 'Programming Collective
# Intelligence' p74, a pdf is online at the below url
# https://github.com/ShawnLeee/the-book/blob/master/pybooks/Programming%20Collective%20Intelligence.pdf
#
# This is a fully connected network stored in sqlite, so, yeah, not for Production. :)

from math import tanh

from timeit import default_timer as timer

## rotwall
#! /bin/bash

# http://unix.stackexchange.com/questions/230238/starting-x-applications-from-the-terminal-and-the-warnings-that-follow
export NO_AT_BRIDGE=1

WALLPAPERS="/home/alex/Dropbox/backgrounds"
ALIST=( `ls -w1 $WALLPAPERS` )
RANGE=${#ALIST[@]}

let "number = $RANDOM"

## config
# This file has been auto-generated by i3-config-wizard(1).
# It will not be overwritten, so edit it as you like.
#
# Should you change your keyboard layout some time, delete
# this file and re-run i3-config-wizard(1).
#

# i3 config file (v4)
#
# Please see http://i3wm.org/docs/userguide.html for a complete reference!

## .Xmodmap
! Unmap capslock
clear Lock
keycode 66 = Hyper_L
!
! leave mod4 as only super
remove mod4 = Hyper_L
!
add mod3 = Hyper_L

## 100-wet.paths
crawl-data/CC-MAIN-2013-20/segments/1368696381249/wet/CC-MAIN-20130516092621-00000-ip-10-60-113-184.ec2.internal.warc.wet.gz
crawl-data/CC-MAIN-2013-20/segments/1368696381249/wet/CC-MAIN-20130516092621-00001-ip-10-60-113-184.ec2.internal.warc.wet.gz
crawl-data/CC-MAIN-2013-20/segments/1368696381249/wet/CC-MAIN-20130516092621-00002-ip-10-60-113-184.ec2.internal.warc.wet.gz
crawl-data/CC-MAIN-2013-20/segments/1368696381249/wet/CC-MAIN-20130516092621-00003-ip-10-60-113-184.ec2.internal.warc.wet.gz
crawl-data/CC-MAIN-2013-20/segments/1368696381249/wet/CC-MAIN-20130516092621-00004-ip-10-60-113-184.ec2.internal.warc.wet.gz
crawl-data/CC-MAIN-2013-20/segments/1368696381249/wet/CC-MAIN-20130516092621-00005-ip-10-60-113-184.ec2.internal.warc.wet.gz
crawl-data/CC-MAIN-2013-20/segments/1368696381249/wet/CC-MAIN-20130516092621-00006-ip-10-60-113-184.ec2.internal.warc.wet.gz
crawl-data/CC-MAIN-2013-20/segments/1368696381249/wet/CC-MAIN-20130516092621-00007-ip-10-60-113-184.ec2.internal.warc.wet.gz
crawl-data/CC-MAIN-2013-

## cc_dumps_desc.txt
CC-MAIN-2019-51/
CC-MAIN-2019-47/
CC-MAIN-2019-43/
CC-MAIN-2019-39/
CC-MAIN-2019-35/
CC-MAIN-2019-30/
CC-MAIN-2019-26/
CC-MAIN-2019-22/
CC-MAIN-2019-18/
CC-MAIN-2019-13/

## wand_example.py
from heapq import nsmallest, nlargest, heappush, heapreplace
from random import randrange, seed
from copy import deepcopy

# Taken directly from https://trevorcohn.github.io/comp90042/slides/WSTA_L3_IR.pdf 26+
p_lists = { 'the': [2,3,7,8,9,10,11,12,13,17,18,19],
            'quick': [5,6,11,14,18],
            'brown': [2,4,5,15,42,84,96],
            'fox': [5,7,8,13] }

## cosine.py
import random
import spacy
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
from numpy.linalg import norm
from statistics import mean

# use spaCy for nlp processing
print('spacy loading')
nlp = spacy.load('en')
	cjson = require "cjson"
	ngx. req. read_body() -- explicitly read the req body
	local data = ngx. req. get_body_data()
	local jsonBody = cjson. decode(data);
	if data then
	local reqs = { }
	--ngx. say("body data:")

	for key, value in pairs(jsonBody) do
	--ngx.print(key)
	# a script to load seshat data into Elasticsearch
	# seshat data is clearly graphdb data, but ES is nifty, so...
	# assumes you have set up elasticsearch and kibana

	import pandas as pd
	from elasticsearch import Elasticsearch
	from elasticsearch.helpers import parallel_bulk

	# assumes local ES
	es = Elasticsearch()
	# This is a version of 'Learning from Clicks' in 'Programming Collective
	# Intelligence' p74, a pdf is online at the below url
	# https://github.com/ShawnLeee/the-book/blob/master/pybooks/Programming%20Collective%20Intelligence.pdf
	#
	# This is a fully connected network stored in sqlite, so, yeah, not for Production. :)

	from math import tanh

	from timeit import default_timer as timer
	#! /bin/bash

	# http://unix.stackexchange.com/questions/230238/starting-x-applications-from-the-terminal-and-the-warnings-that-follow
	export NO_AT_BRIDGE=1

	WALLPAPERS="/home/alex/Dropbox/backgrounds"
	ALIST=( `ls -w1 $WALLPAPERS` )
	RANGE=${#ALIST[@]}

	let "number = $RANDOM"
	# This file has been auto-generated by i3-config-wizard(1).
	# It will not be overwritten, so edit it as you like.
	#
	# Should you change your keyboard layout some time, delete
	# this file and re-run i3-config-wizard(1).
	#

	# i3 config file (v4)
	#
	# Please see http://i3wm.org/docs/userguide.html for a complete reference!
	! Unmap capslock
	clear Lock
	keycode 66 = Hyper_L
	!
	! leave mod4 as only super
	remove mod4 = Hyper_L
	!
	add mod3 = Hyper_L
	crawl-data/CC-MAIN-2013-20/segments/1368696381249/wet/CC-MAIN-20130516092621-00000-ip-10-60-113-184.ec2.internal.warc.wet.gz
	crawl-data/CC-MAIN-2013-20/segments/1368696381249/wet/CC-MAIN-20130516092621-00001-ip-10-60-113-184.ec2.internal.warc.wet.gz
	crawl-data/CC-MAIN-2013-20/segments/1368696381249/wet/CC-MAIN-20130516092621-00002-ip-10-60-113-184.ec2.internal.warc.wet.gz
	crawl-data/CC-MAIN-2013-20/segments/1368696381249/wet/CC-MAIN-20130516092621-00003-ip-10-60-113-184.ec2.internal.warc.wet.gz
	crawl-data/CC-MAIN-2013-20/segments/1368696381249/wet/CC-MAIN-20130516092621-00004-ip-10-60-113-184.ec2.internal.warc.wet.gz
	crawl-data/CC-MAIN-2013-20/segments/1368696381249/wet/CC-MAIN-20130516092621-00005-ip-10-60-113-184.ec2.internal.warc.wet.gz
	crawl-data/CC-MAIN-2013-20/segments/1368696381249/wet/CC-MAIN-20130516092621-00006-ip-10-60-113-184.ec2.internal.warc.wet.gz
	crawl-data/CC-MAIN-2013-20/segments/1368696381249/wet/CC-MAIN-20130516092621-00007-ip-10-60-113-184.ec2.internal.warc.wet.gz
	crawl-data/CC-MAIN-2013-
	CC-MAIN-2019-51/
	CC-MAIN-2019-47/
	CC-MAIN-2019-43/
	CC-MAIN-2019-39/
	CC-MAIN-2019-35/
	CC-MAIN-2019-30/
	CC-MAIN-2019-26/
	CC-MAIN-2019-22/
	CC-MAIN-2019-18/
	CC-MAIN-2019-13/
	from heapq import nsmallest, nlargest, heappush, heapreplace
	from random import randrange, seed
	from copy import deepcopy

	# Taken directly from https://trevorcohn.github.io/comp90042/slides/WSTA_L3_IR.pdf 26+
	p_lists = { 'the': [2,3,7,8,9,10,11,12,13,17,18,19],
	'quick': [5,6,11,14,18],
	'brown': [2,4,5,15,42,84,96],
	'fox': [5,7,8,13] }
	import random
	import spacy
	from elasticsearch import Elasticsearch
	from elasticsearch.helpers import bulk
	from numpy.linalg import norm
	from statistics import mean

	# use spaCy for nlp processing
	print('spacy loading')
	nlp = spacy.load('en')