Last active
April 10, 2017 10:14
-
-
Save inukshuk/f1d47aeab1f778bca8ce to your computer and use it in GitHub Desktop.
Citation Style Predicor
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'csl/styles' | |
require 'citeproc/ruby' | |
require 'httparty' | |
require 'thread' | |
API_KEY = '' # Add your key here! | |
exit if API_KEY.empty? | |
STYLES = Hash.new { |h,k| h[k] = CSL::Style.load(k) } | |
# Pre-load all styles | |
Thread.new do | |
CSL::Style.ls.each do |id| | |
STYLES[id] | |
end | |
end | |
class Predictor | |
include HTTParty | |
base_uri 'anystyle.io' | |
attr_reader :locale, :styles, :limit, :workers | |
def initialize(token, locale = 'en', limit = 10, workers = 4) | |
@token, @locale, @limit, @workers, @styles = | |
token, locale, limit, workers, CSL::Style.ls | |
end | |
def predict(references) | |
parse(references).each do |reference, data| | |
reference = reference.strip | |
print "Computing distances..." | |
queue = styles.dup | |
distances, threads = [], [] | |
workers.times do | |
threads << Thread.new do | |
renderer = CiteProc::Ruby::Renderer.new locale: locale, format: 'text' | |
counter = 0 | |
until queue.empty? | |
id = queue.shift | |
style = STYLES[id] | |
if style && style.bibliography | |
begin | |
string = renderer.render cite(data), style.bibliography | |
rescue => e | |
warn "Failed to render #{id}: #{e.message}" | |
ensure | |
renderer.state.history.discard | |
end | |
end | |
string ||= '' | |
distances.push [ | |
id, string, distance(reference, string) | |
] | |
counter += 1 | |
print '.' if counter % 100 == 0 | |
end | |
end | |
end | |
threads.each do |thread| | |
thread.value rescue warn "Rendering thread crashed: #{$!.message}" | |
end | |
puts 'done' | |
print 'Sorting distances...' | |
distances.sort_by!(&:last) | |
puts 'done' | |
puts "The #@limit best matches are:" | |
distances.take(limit).each do |id, rendition, d| | |
print "#{id} " | |
if d.zero? | |
puts 'perfect match' | |
else | |
puts " (#{d}):\n#{rendition}" | |
end | |
end | |
end | |
end | |
private | |
def parse(references) | |
references = Array(references) | |
print "Trying to parse #{references.length} reference(s) on anystyle.io..." | |
response = post '/parse/references.citeproc', | |
references: references | |
fail response.message unless response.code == 200 | |
puts 'done' | |
references.zip JSON.parse(response.body) | |
end | |
def post(path, options = {}) | |
self.class.post path, body: options.merge!(access_token: @token) | |
end | |
def cite(data) | |
CiteProc::CitationItem.new id: data['id'] || 'ID' do |c| | |
c.data = CiteProc::Item.new data | |
end | |
end | |
# Computes the Levenshtein distance of two strings | |
# using the Wagner-Fischer algorithm | |
def distance(source, target) | |
s, t = source.to_s.chars, target.to_s.chars | |
return t.length if s.empty? | |
return s.length if t.empty? | |
m, n = s.length, t.length | |
d = [(0..n).to_a] | |
for i in (1..m); d[i] = [i] end | |
for j in (1..n) | |
for i in (1..m) | |
if s[i-1] == t[j-1] | |
d[i][j] = d[i-1][j-1] # no operation | |
else | |
d[i][j] = [ | |
d[i-1][j] + 1, # deletion | |
d[i][j-1] + 1, # insertion | |
d[i-1][j-1] + 1 # substitution | |
].min | |
end | |
end | |
end | |
d[m][n] | |
end | |
end | |
p = Predictor.new API_KEY | |
if ARGV.empty? | |
while input = gets | |
p.predict input unless input.strip.empty? | |
end | |
else | |
p.predict ARGV | |
end |
Some notes for other Windows users, or myself if I ever need this again...
First install the requried gems
gem install csl-styles
gem install citeproc/ruby
gem install httparty
gem install thread
Change the base_uri
in the Predictor
class to 'https://anystyle.io'
When facing this SSL error
connect': SSL_connect returned=1 errno=0 state=SSLv3 read server certificate B: certificate verify failed (OpenSSL::SSL::SSLError)
Make sure the environment variables SSL_CERT_DIR and SSL_CERT_FILE are set to a valid cert.pem file. (There's lots of StackExchange questions with more information on this).
Thanks for this API and the example script Sylvester!
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add you API key at the top of the file; run as
./predict.rb
if the file is executable, or elseruby predict.rb
and enter the reference whose style you want to predict.