Last active
April 9, 2018 22:27
-
-
Save hornc/230682dece335b998cb1b30498517251 to your computer and use it in GitHub Desktop.
Update removed authors appearing in search results from OL solr index.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# coding: utf-8 | |
from olclient.openlibrary import OpenLibrary | |
from datetime import datetime | |
import random | |
import time | |
# script to take an OL Author search and incrementally update the solr index | |
def author_search(ol, term, offset=0, limit=100): | |
results = ol.session.get(ol.base_url + "/search/authors.json?q=%s&offset=%d&limit=%d" % (term, offset, limit)) | |
return results.json() | |
def solr_update(key_list): | |
"""Sends keys to ol /admin/solr endpoint for updating.""" | |
keys = "\n".join(key_list) | |
return ol.session.post(ol.base_url + '/admin/solr', data={'keys': keys}) | |
if __name__ == '__main__': | |
ol = OpenLibrary() | |
index_delay = 15 # minutes | |
batch_size = 80 | |
term = '.com' | |
offset = 0 | |
last_count = 0 | |
same_results_count = 0 | |
while same_results_count < 5: | |
results = author_search(ol, term, offset, batch_size) | |
if results['numFound'] == last_count: | |
same_results_count += 1 | |
else: | |
same_results_count = 0 # count only consecutive same results | |
last_count = results['numFound'] | |
print "%s: Found %d results" % (datetime.now(), last_count) | |
author_keys = ['/authors/' + d['key'] for d in results['docs']] | |
print solr_update(author_keys) | |
if (2 * batch_size) < last_count: | |
if same_results_count > 0: | |
end = last_count | |
else: | |
end = last_count - (2 * batch_size) | |
offset = random.randint((same_results_count * batch_size), end) | |
else: | |
offset = same_results_count * batch_size | |
print "Offset: %d" % offset | |
time.sleep(index_delay * 60) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment