Last active
September 21, 2015 19:09
-
-
Save jamilatta/8d2695878f6d2ce15f3f to your computer and use it in GitHub Desktop.
Script to pick up the growth of articles and issues in 2005 to 2016 (SciELO BR).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
import requests | |
from lxml import html | |
ISSN_URL = "http://articlemeta.scielo.org/api/v1/journal/identifiers/?collection=scl" | |
STATBILIO_URL = "http://statbiblio.scielo.org//stat_biblio/index.php?state=15&lang=en&country=scl&issn=ISSN&CITED[]=ISSN&YNG[]=YEAR" | |
YEARS = range(2005, 2016) | |
def main(): | |
print "Get a list of ISSN" | |
issns = [issn['code'] for issn in requests.get(ISSN_URL).json()['objects']] | |
fp = open('cutus.out', 'w') | |
for year in YEARS: | |
for issn in issns: | |
for anyissn in issn: | |
ret = requests.get(STATBILIO_URL.replace("ISSN", anyissn).replace("YEAR", str(year))) | |
tree = html.fromstring(ret.content) | |
if not tree.xpath('//div[@class="message"]'): | |
n_issue = tree.xpath("//table//table//tr[4]/td[2]")[0].text_content() | |
n_article = tree.xpath("//table//table//tr[4]/td[3]")[0].text_content() | |
fp.write(';'.join([str(year), anyissn, n_issue, n_article + '\n'])) | |
print (year, anyissn, n_issue, n_article) | |
fp.close() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment