Skip to content

Instantly share code, notes, and snippets.

@jaklinger
Created July 5, 2018 09:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save jaklinger/d3c4940248c4a3dc52ce4d3d55708009 to your computer and use it in GitHub Desktop.
Save jaklinger/d3c4940248c4a3dc52ce4d3d55708009 to your computer and use it in GitHub Desktop.
Get papers from Open Academic Graph by FOS
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.sql import text as sql_text
from collections import defaultdict
with open('/Users/jklinger/Nesta-AWS/AWS-RDS-config/open-academic-graph.config') as f:
host, port, database, user, password = f.read().split(':')
database_uri = 'postgresql://{}:{}@{}/{}'.format(user, password, host, "microsoft_academic_graph")
con = create_engine(database_uri)
query = ''' select paper from microsoft_academic_graph
where ((paper -> 'fos'::text)) @> :fos
and ((paper ->> 'lang'::text)) = 'en'
and paper::jsonb ? 'abstract'
and paper::jsonb ? 'keywords'
limit 3000'''
fosses = ['Biology', 'Medicine','Geology','Chemistry',
'Psychology','Philosophy','Sociology','Engineering',
'Economics','Computer Science','Art','Physics',
'History','Political Science','Materials Science',
'Mathematics','Geography','Business'] #,'Environmental Science']
ids = []
i=0
papers = defaultdict(list)
for fos in fosses:
print(fos)
exec_result = con.execute(sql_text(query), fos='["{}"]'.format(fos))
for paper, in exec_result.fetchall():
papers[fos].append(paper)
ids.append(paper['id'])
len(set(ids)), len(ids)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment