Skip to content

Instantly share code, notes, and snippets.

@jsicot
Created October 16, 2018 18:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jsicot/473000d4d506e18f7a502c56b6d117d7 to your computer and use it in GitHub Desktop.
Save jsicot/473000d4d506e18f7a502c56b6d117d7 to your computer and use it in GitHub Desktop.
Retrieve author PPN
import csv
import urllib.request,urllib.parse,json
import os, ssl
# SSL exception
if (not os.environ.get('PYTHONHTTPSVERIFY', '') and
getattr(ssl, '_create_unverified_context', None)):
ssl._create_default_https_context = ssl._create_unverified_context
def getPpn(name,firstname,birthdate):
query = urllib.parse.quote(name)+"%20"+urllib.parse.quote(firstname)+"%20"+birthdate
svc = "https://www.idref.fr/Sru/Solr?q=persname_t:%22"+query+"%22%20AND%20recordtype_z:a&sort=score%20desc&version=2.2&start=0&rows=30&indent=on&fl=id,ppn_z,recordtype_z,affcourt_z&wt=json"
ppn = ""
req = urllib.request.urlopen(svc)
data = req.read().decode('utf-8')
j_obj = json.loads(data)
numfound = j_obj['response']['numFound']
if numfound > 0:
ppn = j_obj['response']['docs'][0]['ppn_z']
if numfound == 0 and birthdate != "":
ppn = getPpn(name,firstname,"")
return ppn
def constructOutput(input_file):
output = []
with open(input_file, 'r') as csvfile:
reader = list(csv.reader(csvfile, delimiter=','))
for row in reader:
ppn = getPpn(row[1],row[2],row[3])
author = {'user_id':row[0] ,'firstname':row[2], 'surname': row[1], 'ppn': ppn}
output.append(author)
csvfile.close()
return output
def writeCsv(file, list_of_dicts):
keys = list_of_dicts[0].keys()
with open(file, 'w', encoding='utf-8') as output_file:
dict_writer = csv.DictWriter(output_file, keys)
dict_writer.writeheader()
dict_writer.writerows(list_of_dicts)
output_file.close()
toCSV = constructOutput('input.csv')
writeCsv('out.csv', toCSV)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment