Ben O'Steen benosteen

## altototxt.py
#python3
import re, os

text_p = re.compile(r"CONTENT=\"([^\"]*)\"", re.U)
line_p = re.compile(r"</TextLine>", re.U)

def get_text(alto_filepath):
  current = ""
  text_content = ""
  words = []

## flatten_book_data.py
# NB this will flatten the data. Some fields (author, pdf, imgs) have extra data that will be lost if you include them in this data.
# the "author" field has a variety of nuances that will be lost for example (creator, editor, etc)
# Intended for use with https://dx.doi.org/10.21250/DB21
# MIT Licence 2016

import json, csv

EXPORTFILENAME = "book_data.csv"

FIELDS = ['datefield', 'shelfmarks', 'title', 'publisher', 'edition', 'flickr_url_to_book_images', 'place', 'issuance',

## HigherNumberWins.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                benosteen
                / HigherNumberWins.ipynb
            
            
              Last active
              March 4, 2016 17:00
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## gist:822fef58c6c92b120fff
import os
from xml.etree import ElementTree as ET
import csv

OUTPUT = "/path/to/output.csv"
PATH = "/path/to/root/folder/that/has/all/the/xmls"

HEADERS = ['ALBUM', 'TITLE', 'ARTIST', 'GENRE', 'TRACKNUMBER', 'COMMENTS', 'YEAR', 'id3v2', 'PATH', 'FROM_FILENAME', 'id3v2']

# Assuming UTF-8...

## add_numbers.py
import csv
import json

INPUTFILE = "History_Journal_Articles_KW.csv"

OUTPUTFILE = INPUTFILE[:-4] + "_numbered.csv"

in_file = open(INPUTFILE, "r")  # "r" == Open file for reading
out_file = open(OUTPUTFILE, "w") # "w" for writing

## AddBNBDataToHadoop.sh
for zipfile in `ls *.zip`
do
echo "Unpacking $zipfile"
unzip $zipfile
echo "Attempting to add ${zipfile%.*} to HDFS directory 'BNB'"
hadoop fs -copyFromLocal ${zipfile%.*} BNB/${zipfile%.*}
echo "Removing unpacked ${zipfile%.*} from local directory"
rm ${zipfile%.*}
done

## UMIDList
0xFFFFFFFFFFFFFFFFFFFFFFFF13002cd712a92f022957058072afFFFFFFFFFFFF
0xFFFFFFFFFFFFFFFFFFFFFFFF1300002a774a6c02295705804824FFFFFFFFFFFF
0xFFFFFFFFFFFFFFFFFFFFFFFF13002cd7719d06032957058072afFFFFFFFFFFFF
0xFFFFFFFFFFFFFFFFFFFFFFFF1300002ae7f04203295705804824FFFFFFFFFFFF
0xFFFFFFFFFFFFFFFFFFFFFFFF13006953dd884103295705805f91FFFFFFFFFFFF
0xFFFFFFFFFFFFFFFFFFFFFFFF1300002a2cd2b303295705804824FFFFFFFFFFFF
0xFFFFFFFFFFFFFFFFFFFFFFFF130069538c8eb703295705805f91FFFFFFFFFFFF
0xFFFFFFFFFFFFFFFFFFFFFFFF13004ae23fb72004295705803d6dFFFFFFFFFFFF
0xFFFFFFFFFFFFFFFFFFFFFFFF130018bf5d2d2004295705806785FFFFFFFFFFFF
0xFFFFFFFFFFFFFFFFFFFFFFFF13006953fbe23504295705805f91FFFFFFFFFFFF

## VisualisingWithHTMLColour.py
def bars(html_filename, list_of_stuff):
  with open(html_filename, "w") as htmlfile:
    htmlfile.write("<html><head><style>.bar { width: 100%; height: 0.3em; } </style></head><body>")
    for instance_number in list_of_stuff:
      htmlfile.write('<div class="bar" style="background-color: #{0};">&nbsp;</div>\n'.format(instance_number))
    htmlfile.write("</body></html>")

def blocks(html_filename, list_of_stuff):
  with open(html_filename, "w") as htmlfile:
    htmlfile.write("<html><head><style>.block { width: 0.3em; height: 0.3em; float:left; } </style></head><body>")

## BGG_weights.py
#!/usr/bin/env python

USER = "benosteen"
CACHE_FILE = "data.json"

rating_t ="""http://www.boardgamegeek.com/xmlapi/collection/%s?rated=1"""
weight_t = """http://www.boardgamegeek.com/xmlapi/boardgame/%s?stats=1"""

import requests
from xml.etree import ElementTree as ET

## console.log
modprobe snc_bcm2835
====================

Apr 30 23:05:22 raspberrypi kernel: ### snd_bcm2835_alsa_probe c039e448 ############### PROBING FOR bcm2835 ALSA device (0):(1) ###############
Apr 30 23:05:22 raspberrypi kernel: Creating card...
Apr 30 23:05:22 raspberrypi kernel: Creating device/chip ..
Apr 30 23:05:22 raspberrypi kernel: Adding controls ..
Apr 30 23:05:22 raspberrypi kernel: Registering card ....
Apr 30 23:05:22 raspberrypi kernel: bcm2835 ALSA CARD CREATED!
Apr 30 23:05:22 raspberrypi kernel: ### BCM2835 ALSA driver init OK ###
	#python3
	import re, os

	text_p = re.compile(r"CONTENT=\"([^\"]*)\"", re.U)
	line_p = re.compile(r"</TextLine>", re.U)

	def get_text(alto_filepath):
	current = ""
	text_content = ""
	words = []
	# NB this will flatten the data. Some fields (author, pdf, imgs) have extra data that will be lost if you include them in this data.
	# the "author" field has a variety of nuances that will be lost for example (creator, editor, etc)
	# Intended for use with https://dx.doi.org/10.21250/DB21
	# MIT Licence 2016

	import json, csv

	EXPORTFILENAME = "book_data.csv"

	FIELDS = ['datefield', 'shelfmarks', 'title', 'publisher', 'edition', 'flickr_url_to_book_images', 'place', 'issuance',
	import os
	from xml.etree import ElementTree as ET
	import csv

	OUTPUT = "/path/to/output.csv"
	PATH = "/path/to/root/folder/that/has/all/the/xmls"

	HEADERS = ['ALBUM', 'TITLE', 'ARTIST', 'GENRE', 'TRACKNUMBER', 'COMMENTS', 'YEAR', 'id3v2', 'PATH', 'FROM_FILENAME', 'id3v2']

	# Assuming UTF-8...
	import csv
	import json

	INPUTFILE = "History_Journal_Articles_KW.csv"

	OUTPUTFILE = INPUTFILE[:-4] + "_numbered.csv"

	in_file = open(INPUTFILE, "r") # "r" == Open file for reading
	out_file = open(OUTPUTFILE, "w") # "w" for writing
	for zipfile in `ls *.zip`
	do
	echo "Unpacking $zipfile"
	unzip $zipfile
	echo "Attempting to add ${zipfile%.*} to HDFS directory 'BNB'"
	hadoop fs -copyFromLocal ${zipfile%.} BNB/${zipfile%.}
	echo "Removing unpacked ${zipfile%.*} from local directory"
	rm ${zipfile%.*}
	done
	0xFFFFFFFFFFFFFFFFFFFFFFFF13002cd712a92f022957058072afFFFFFFFFFFFF
	0xFFFFFFFFFFFFFFFFFFFFFFFF1300002a774a6c02295705804824FFFFFFFFFFFF
	0xFFFFFFFFFFFFFFFFFFFFFFFF13002cd7719d06032957058072afFFFFFFFFFFFF
	0xFFFFFFFFFFFFFFFFFFFFFFFF1300002ae7f04203295705804824FFFFFFFFFFFF
	0xFFFFFFFFFFFFFFFFFFFFFFFF13006953dd884103295705805f91FFFFFFFFFFFF
	0xFFFFFFFFFFFFFFFFFFFFFFFF1300002a2cd2b303295705804824FFFFFFFFFFFF
	0xFFFFFFFFFFFFFFFFFFFFFFFF130069538c8eb703295705805f91FFFFFFFFFFFF
	0xFFFFFFFFFFFFFFFFFFFFFFFF13004ae23fb72004295705803d6dFFFFFFFFFFFF
	0xFFFFFFFFFFFFFFFFFFFFFFFF130018bf5d2d2004295705806785FFFFFFFFFFFF
	0xFFFFFFFFFFFFFFFFFFFFFFFF13006953fbe23504295705805f91FFFFFFFFFFFF
	def bars(html_filename, list_of_stuff):
	with open(html_filename, "w") as htmlfile:
	htmlfile.write("<html><head><style>.bar { width: 100%; height: 0.3em; } </style></head><body>")
	for instance_number in list_of_stuff:
	htmlfile.write('<div class="bar" style="background-color: #{0};"> </div>\n'.format(instance_number))
	htmlfile.write("</body></html>")

	def blocks(html_filename, list_of_stuff):
	with open(html_filename, "w") as htmlfile:
	htmlfile.write("<html><head><style>.block { width: 0.3em; height: 0.3em; float:left; } </style></head><body>")
	#!/usr/bin/env python

	USER = "benosteen"
	CACHE_FILE = "data.json"

	rating_t ="""http://www.boardgamegeek.com/xmlapi/collection/%s?rated=1"""
	weight_t = """http://www.boardgamegeek.com/xmlapi/boardgame/%s?stats=1"""

	import requests
	from xml.etree import ElementTree as ET
	modprobe snc_bcm2835
	====================

	Apr 30 23:05:22 raspberrypi kernel: ### snd_bcm2835_alsa_probe c039e448 ############### PROBING FOR bcm2835 ALSA device (0):(1) ###############
	Apr 30 23:05:22 raspberrypi kernel: Creating card...
	Apr 30 23:05:22 raspberrypi kernel: Creating device/chip ..
	Apr 30 23:05:22 raspberrypi kernel: Adding controls ..
	Apr 30 23:05:22 raspberrypi kernel: Registering card ....
	Apr 30 23:05:22 raspberrypi kernel: bcm2835 ALSA CARD CREATED!
	Apr 30 23:05:22 raspberrypi kernel: ### BCM2835 ALSA driver init OK ###