View NC-HB2-ids.sh
# This script requires the jq utility | |
# https://stedolan.github.io/jq/ | |
# Datasets created with twarc | |
# https://github.com/DocNow/twarc | |
mkdir -p NCHB2-ids | |
rm NCHB2-ids/NCHB2* | |
touch NCHB2-ids/NCHB2-ids-with-dupes.txt | |
# Create more relevant subset of "North Carlina" search |
View code4lib-vote
#!/usr/bin/env ruby | |
# To add this to cron do something like this to use the ruby wrapper script: | |
# */15 8-17 * * 1-5 env DISPLAY=:0.0 /home/jnronall/.rvm/wrappers/ruby-2.1.1/ruby /home/jnronall/bin/code4lib-vote > $HOME/tmp/code4lib-vote-cron.log 2>&1 | |
require 'httpclient' | |
require 'json' | |
require 'date' | |
require 'libnotify' | |
require 'slop' |
View net_ldap_overrides.rb
class Net::LDAP | |
def initialize(args = {}) | |
@host = args[:host] || DefaultHost | |
@port = args[:port] || DefaultPort | |
@verbose = false # Make this configurable with a switch on the class. | |
@auth = args[:auth] || DefaultAuth | |
@base = args[:base] || DefaultTreebase | |
encryption args[:encryption] # may be nil |
View commandline use
$ ~/.cabal/bin/pandoc --version | |
pandoc 1.12.3.3 | |
$ ~/.cabal/bin/pandoc -w dzslides --standalone --self-contained ~/tmp/pandoc-poster-image-test.md > ~/tmp/pandoc-poster-image-test.html |
View dzslides2pdf.rb
#! /usr/bin/env ruby | |
# dzslides2pdf.rb | |
# dzslides2pdf.rb http://localhost/presentation_root presentation.html | |
require 'capybara/dsl' | |
require 'capybara-webkit' | |
# require 'capybara/poltergeist' | |
require 'fileutils' | |
include Capybara::DSL |
View dabblet.css
/** | |
* "Google Now" Card | |
*/ | |
body { | |
background: #e1e1e1; | |
min-height: 100%; | |
margin: auto; | |
} | |
ul.gNow { | |
width: 450px; |
View item.json
{ | |
"type": [ | |
"http:\/\/schema.org\/Organization" | |
], | |
"properties": { | |
"name": [ | |
"Riverdale" | |
], | |
"url": [ | |
"http:\/\/d.lib.ncsu.edu\/collections\/catalog?f%5Bnames_facet%5D%5B%5D=Riverdale" |
View tesse
#!/usr/bin/env ruby | |
# tesse: commandline tool for looking at tesseract OCR and cleaning the output | |
# Besides the following gem requirements it requires the following Linux programs: | |
# eog: for viewing the images | |
# wmctrl: for resizing and positioning the image viewing window | |
require 'tesseract' | |
require 'ffi/aspell' |
View get_and_process_webdatacommons_data.sh
#!/usr/bin/env bash | |
# These steps will take a long time to download the data set. | |
# First, get the list of available NQuad files to download. | |
wget http://webdatacommons.org/2012-08/stats/files.list | |
# We're only interested in the microdata set right now since that seems to be where schema.org/Book is used more. So create a file list | |
cat files.list | grep html-microdata > microdata_files.list | |
# OK, this will take a while depending on your connection. Let it run overnight. | |
wget -i microdata_files.list |
View common_crawl_hostname_count.rb
#!/usr/bin/env ruby | |
# a quick, simple script to partially parse output from https://github.com/trivio/common_crawl_index/blob/master/bin/remote_read | |
# and output subdomains in order of count | |
url_counts = {} | |
total_urls = 0 | |
File.readlines(ARGV[0]).each do |line| | |
url = line.split(' ').first | |
reverse_hostname = url.split('/').first |
NewerOlder