Skip to content

Instantly share code, notes, and snippets.

@K-Wu
Created March 13, 2019 04:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save K-Wu/e8fe24311093574ccd3d88cc5d70cb54 to your computer and use it in GitHub Desktop.
Save K-Wu/e8fe24311093574ccd3d88cc5d70cb54 to your computer and use it in GitHub Desktop.
A script that obtains journals and conferences in all sub-fields in computer science by leveraging Microsoft Academic Knowledge API
# Reference 1: https://dev.labs.cognitive.microsoft.com/docs/services/56332331778daf02acc0a50b/operations/565d9001ca73072048922d97
# Reference 2: https://docs.microsoft.com/en-us/azure/cognitive-services/academic-knowledge/paperentityattributes
key1 = 'put_your_key_here'
key2 = 'put_your_key_here'
key = 'put_your_key_here'
CS_CATEGORIES = ["artificial intelligence", "computer hardware", "computer vision", "computer network",
"real-time computing", "distributed computing", "pattern recognition", "data mining",
"machine learning", "embedded system", "knowledge management", "multimedia", "library science",
"simulation", "algorithm", "database", "world wide web", "computer security", "speech recognition",
"telecommunications", "natural language processing", "theoretical computer science",
"information retrieval", "programming language", "computer architecture", "software engineering",
"operating system", "parallel computing", "human–computer interaction", "computer graphics",
"computational science", "computer engineering", "data science", "internet privacy"]
import requests
import json
import utils
import time
def _obtain_all(category,attributes):
offset = 0
results=[]
total_entries=1
while 1:
time.sleep(7)
if offset>=total_entries:
break
if key==key1:
headers = {'Ocp-Apim-Subscription-Key': key2}
else:
headers = {'Ocp-Apim-Subscription-Key': key1}
url = "https://api.labs.cognitive.microsoft.com/academic/v1.0/calchistogram?expr=And(Composite(F.FN=='{category}'),Y>2010)&attributes={attributes}&count=1000&offset={offset}&timeout=3600000".format(
category=category,attributes=attributes, offset=offset)
response = requests.get(url, headers=headers)
if response.status_code!=200:
print("WARNING: request failed in category: {category} attributes: {attributes}")
print(str(response.content))
continue
response_dict = json.loads(response.content)
if offset==0:
total_entries = response_dict['histograms'][0]['distinct_values']
results.extend([single_dict for single_dict in response_dict['histograms'][0]['histogram']])
offset+=1000
return results
def obtain_all_conferences(category):
attributes="C.CN"
return _obtain_all(category,attributes)
def obtain_all_journals(category):
attributes="J.JN"
return _obtain_all(category,attributes)
def main():
for category in CS_CATEGORIES:
curr_journals_list = obtain_all_journals(category)
curr_conferences_list = obtain_all_conferences(category)
result=dict()
result['journals']=curr_journals_list
result['conferences']=curr_conferences_list
utils.save_obj(result,"{category}.pkl".format(category=category.replace(" ","_")))
if __name__ == "__main__":
def test():
headers = {'Ocp-Apim-Subscription-Key': key1}
# url="https://api.labs.cognitive.microsoft.com/academic/v1.0/calchistogram?expr=And(Composite(AA.AuN=='jaime teevan'),Y>2012)&attributes=Y,F.FN&count=4"
url = "https://api.labs.cognitive.microsoft.com/academic/v1.0/calchistogram?expr=Y>2017&attributes=F.FN&count=262942&timeout=3600000"
url = "https://api.labs.cognitive.microsoft.com/academic/v1.0/calchistogram?expr=And(Composite(F.FN=='artificial intelligence'),Y>2010)&attributes=J.JN&count=262942&timeout=3600000"
url = "https://api.labs.cognitive.microsoft.com/academic/v1.0/interpret?query= journal by Yong Li after 2012"
url = "https://api.labs.cognitive.microsoft.com/academic/v1.0/calchistogram?expr=And(Composite(F.FN=='artificial intelligence'),Y>2010)&attributes=J.JN&count=1000&timeout=3600000"
url = "https://api.labs.cognitive.microsoft.com/academic/v1.0/calchistogram?expr=And(Composite(F.FN=='computer architecture'),Y>2010)&attributes=C.CN&count=1000&timeout=3600000"
response = requests.get(url, headers=headers)
pass
main()
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment