Skip to content

Instantly share code, notes, and snippets.

Created March 10, 2015 23:22
Show Gist options
  • Save aschweer/e4a4d168ca9ad094dca4 to your computer and use it in GitHub Desktop.
Save aschweer/e4a4d168ca9ad094dca4 to your computer and use it in GitHub Desktop.
Re-index DSpace solr documents
package org.dspace.util;
import org.apache.commons.cli.*;
import org.apache.commons.lang.StringUtils;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
import org.dspace.core.ConfigurationManager;
import org.joda.time.*;
import org.joda.time.format.PeriodFormatter;
import org.joda.time.format.PeriodFormatterBuilder;
import java.util.*;
* @author Andrea Schweer for the LCoNZ Institutional Research Repositories
public class ReindexSolr
public static final String HELP_OPTION = "h";
public static final String INDEX_NAME_OPTION = "i";
public static void main(String[] args) throws ParseException {
CommandLineParser parser = new PosixParser();
Options options = new Options();
options.addOption(HELP_OPTION, "help", false, "Get help on options for this command.");
options.addOption(INDEX_NAME_OPTION, "index-name", true, "The names of the indexes to process. At least one is required. Available indexes are: authority, statistics.");
CommandLine line = parser.parse(options, args);
if (line.hasOption(HELP_OPTION))
printHelpAndExit(options, 0);
if (!line.hasOption(INDEX_NAME_OPTION))
System.out.println("This command requires the index-name option but none was present.");
printHelpAndExit(options, 1);
String[] indexNames = line.getOptionValues(INDEX_NAME_OPTION);
for (String indexName : indexNames)
String solrURL = findSolrUrl(indexName);
if (StringUtils.isBlank(solrURL))
System.out.println("Solr index name " + indexName + " not recognised or no URL could be found for the index.");
printHelpAndExit(options, 1);
String timeField = "statistics".equals(indexName) ? "time" : "last_modified_date";
try {
DateTime before =;
reindex(solrURL, timeField);
DateTime after =;
Period period = new Period(before, after);
PeriodFormatter formatter = new PeriodFormatterBuilder().appendDays().appendSuffix(" days ").appendHours().appendSuffix(" hours ")
.appendMinutes().appendSuffix(" minutes ").appendSeconds().appendSuffix(" seconds")
System.out.println("Processed index " + indexName + " in " + formatter.print(period));
catch (SolrServerException | ReindexSolrException | IOException e) {
if (e.getCause() != null && e.getCause() instanceof ConnectException)
System.err.println("Solr needs to be running for this command to succeed but doesn't appear to be reachable: " + e.getMessage());
System.err.println("An exception was caught trying to re-index " + indexName + ": " + e.getMessage());
catch (ParseException e)
System.err.println("Cannot read command options");
printHelpAndExit(options, 1);
private static String findSolrUrl(String indexName)
if ("authority".equals(indexName))
return ConfigurationManager.getProperty("solr.authority.server");
if ("statistics".equals(indexName))
// TODO allow year cores to be reindexed too?
return ConfigurationManager.getProperty("solr-statistics", "server");
return null;
private static void reindex(String solrURL, String timeField) throws SolrServerException, ReindexSolrException, IOException {
HttpSolrServer solr = new HttpSolrServer(solrURL);
Date earliestDate = getEarliestDocDate(timeField, solr);
if (earliestDate == null) {
throw new ReindexSolrException("Cannot find earliest document in index");
int monthsSinceEarliest = calculateMonthsSince(earliestDate);
String queryString = timeField + ":[NOW/MONTH TO NOW]";
SolrQuery query = new SolrQuery(queryString);
for (int monthsAgo = 1; monthsAgo <= monthsSinceEarliest + 1; monthsAgo++)
SolrDocumentList results = solr.query(query).getResults();
System.out.println("This month has " + results.getNumFound() + " documents");
Iterator<SolrDocument> iterator = results.iterator();
List<SolrInputDocument> addBack = new ArrayList<>();
while (iterator.hasNext())
SolrDocument result =;
if (!addBack.isEmpty())
solr.commit(true, true);
System.out.println("Processed " + addBack.size() + " documents");
// TODO do the same thing again if this month has more than 250000 docs
// prepare query for next round
queryString = timeField + ":[" + "NOW/MONTH-" + monthsAgo + "MONTHS"
+ " TO "
+ "NOW/MONTH" + (monthsAgo > 1 ? "-" + (monthsAgo - 1) + "MONTHS" : "") + "]";
private static int calculateMonthsSince(Date from)
DateTime now ="UTC"))).withTimeAtStartOfDay();
DateTime actualFrom = new DateTime(from).withTimeAtStartOfDay();
return Months.monthsBetween(actualFrom, now).getMonths();
private static Date getEarliestDocDate(String timeField, HttpSolrServer solr) throws SolrServerException
SolrQuery query = new SolrQuery("*:*");
query.setSort(timeField, SolrQuery.ORDER.asc);
QueryResponse response = solr.query(query);
SolrDocumentList results = response.getResults();
if (results.size() != 1)
return null;
return (Date) results.get(0).getFieldValue(timeField);
private static void printHelpAndExit(Options options, int exitCode)
HelpFormatter myhelp = new HelpFormatter();
myhelp.printHelp(ReindexSolr.class.getSimpleName() + "\n", options);
private static class ReindexSolrException extends Throwable
public ReindexSolrException(String message)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment