Skip to content

Instantly share code, notes, and snippets.

@hornc
Last active April 9, 2018 22:27
Show Gist options
  • Save hornc/230682dece335b998cb1b30498517251 to your computer and use it in GitHub Desktop.
Save hornc/230682dece335b998cb1b30498517251 to your computer and use it in GitHub Desktop.
Update removed authors appearing in search results from OL solr index.
#!/usr/bin/python
# coding: utf-8
from olclient.openlibrary import OpenLibrary
from datetime import datetime
import random
import time
# script to take an OL Author search and incrementally update the solr index
def author_search(ol, term, offset=0, limit=100):
results = ol.session.get(ol.base_url + "/search/authors.json?q=%s&offset=%d&limit=%d" % (term, offset, limit))
return results.json()
def solr_update(key_list):
"""Sends keys to ol /admin/solr endpoint for updating."""
keys = "\n".join(key_list)
return ol.session.post(ol.base_url + '/admin/solr', data={'keys': keys})
if __name__ == '__main__':
ol = OpenLibrary()
index_delay = 15 # minutes
batch_size = 80
term = '.com'
offset = 0
last_count = 0
same_results_count = 0
while same_results_count < 5:
results = author_search(ol, term, offset, batch_size)
if results['numFound'] == last_count:
same_results_count += 1
else:
same_results_count = 0 # count only consecutive same results
last_count = results['numFound']
print "%s: Found %d results" % (datetime.now(), last_count)
author_keys = ['/authors/' + d['key'] for d in results['docs']]
print solr_update(author_keys)
if (2 * batch_size) < last_count:
if same_results_count > 0:
end = last_count
else:
end = last_count - (2 * batch_size)
offset = random.randint((same_results_count * batch_size), end)
else:
offset = same_results_count * batch_size
print "Offset: %d" % offset
time.sleep(index_delay * 60)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment