Skip to content

Instantly share code, notes, and snippets.

@hanjianwei
Last active August 29, 2015 14:01
Show Gist options
  • Save hanjianwei/15f1215e984e21b3ba3d to your computer and use it in GitHub Desktop.
Save hanjianwei/15f1215e984e21b3ba3d to your computer and use it in GitHub Desktop.
Retrieve doi list from table of contents page (ACM Digital Library)
#!/usr/bin/env phantomjs
fs = require 'fs'
webPage = require 'webpage'
system = require 'system'
printDoi = (url, remains) ->
page = webPage.create()
redirectURL = null
console.log "Loading " + url
page.onConsoleMessage = (msg) ->
fs.write 'output.txt', msg, 'a'
page.onResourceReceived = (resource) ->
if url == resource.url and resource.redirectURL
redirectURL = resource.redirctURL
page.open url, (status) ->
if redirectURL
printDoi redirectURL, remains
else if status == 'success'
page.evaluate ->
console.log '\n' + document.title + '\n'
links = document.getElementsByTagName 'a'
console.log link.innerHTML + '\n' for link in links when link.getAttribute('title') == 'DOI'
if remains.length == 0
phantom.exit()
else
printDoi remains[0], remains[1..]
if system.args.length == 1
console.log 'Usage: acm.coffee <conditions>'
phantom.exit()
pattern = new RegExp '.*' + system.args[1..].join('.*') + '.*', 'i'
acm = JSON.parse fs.read('./acm.json')
urls = for conference, url of acm when conference.match(pattern)
console.log conference
url
system.stdout.writeLine('Fetch doi? ')
confirm = system.stdin.readLine()
if confirm == 'y' or confirm == 'yes'
printDoi urls[0], urls[1..]
else if confirm
printDoi confirm, []
else
phantom.exit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment