Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save joel-bernstein/9d03ebaefb3114a55c0c719a31f6e821 to your computer and use it in GitHub Desktop.
Save joel-bernstein/9d03ebaefb3114a55c0c719a31f6e821 to your computer and use it in GitHub Desktop.
Block loader
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.common.SolrInputDocument;
import java.util.Random;
import java.util.Date;
import java.util.UUID;
import java.io.*;
import org.apache.commons.math3.distribution.NormalDistribution;
import org.apache.commons.math3.distribution.UniformIntegerDistribution;
import org.apache.commons.math3.distribution.EnumeratedIntegerDistribution;
import org.apache.commons.math3.distribution.ZipfDistribution;
public class BlockLoader {
public static void main(String args[]) throws Exception {
BufferedReader in = new BufferedReader(new FileReader("words.txt"));
String[] words = new String[3001];
for(int w=0; w<words.length; w++) {
words[w] = in.readLine();
}
in.close();
int num = Integer.parseInt(args[0]);
String zkUrl = args[1];
String collection = args[2];
System.out.println("Connecting to "+zkUrl);
SolrClient client = null;
if(zkUrl.startsWith("http")) {
HttpSolrClient.Builder builder = new HttpSolrClient.Builder();
client = builder.withBaseSolrUrl(zkUrl+"/"+collection).build();
} else {
CloudSolrClient.Builder builder = new CloudSolrClient.Builder();
CloudSolrClient c = builder.withZkHost(zkUrl).build();
c.setDefaultCollection(collection);
client = c;
}
UpdateRequest request = new UpdateRequest();
System.out.println("Connected");
int i=0;
String[] dates = {"2012-01-20T17:33:18Z",
"2012-02-20T17:33:18Z",
"2012-03-20T17:33:18Z",
"2012-04-20T17:33:18Z",
"2012-05-20T17:33:18Z",
"2012-06-20T17:33:18Z",
"2012-07-20T17:33:18Z",
"2012-08-20T17:33:18Z",
"2012-09-20T17:33:18Z",
"2012-10-20T17:33:18Z",
"2012-11-20T17:33:18Z",
"2012-12-20T17:33:18Z",
"2013-01-20T17:33:18Z",
"2013-02-20T17:33:18Z",
"2013-03-20T17:33:18Z",
"2013-04-20T17:33:18Z",
"2013-05-20T17:33:18Z",
"2013-06-20T17:33:18Z",
"2013-07-20T17:33:18Z",
"2013-08-20T17:33:18Z",
"2013-09-20T17:33:18Z",
"2013-10-20T17:33:18Z",
};
Random rand = new Random();
NormalDistribution normal = new NormalDistribution(40000, 2000);
NormalDistribution noise = new NormalDistribution(100, 15);
NormalDistribution noise1 = new NormalDistribution(0, 15);
NormalDistribution noise2 = new NormalDistribution(0, 7);
NormalDistribution noise3 = new NormalDistribution(0, 2);
NormalDistribution loadD = new NormalDistribution(500, 100);
UniformIntegerDistribution serviceLevel = new UniformIntegerDistribution(1, 4);
NormalDistribution componentA = new NormalDistribution(500, 50);
NormalDistribution componentB = new NormalDistribution(500, 25);
int[] prods = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
double[] probs = {1,2,3,4,5,6,7,25,9,10,37,12,13,14,15};
EnumeratedIntegerDistribution enumd = new EnumeratedIntegerDistribution(prods, probs);
double lat = 40.7128;
double lon = -74.0060;
UniformIntegerDistribution hour = new UniformIntegerDistribution(1,17);
UniformIntegerDistribution minute = new UniformIntegerDistribution(1,56);
UniformIntegerDistribution second = new UniformIntegerDistribution(1,56);
UniformIntegerDistribution prod = new UniformIntegerDistribution(1,500000);
ZipfDistribution zipF = new ZipfDistribution(3000, 1);
for(i=0; i<num; i++) {
//System.out.println("Adding doc:"+i);
UUID id = UUID.randomUUID();
StringBuilder paragraph = new StringBuilder();
for(int w=0; w<70; w++) {
int word = zipF.sample();
paragraph.append(word);
paragraph.append(" ");
}
double moveLat = lat+componentA.sample();
double moveLong = lon+componentB.sample();
String ho = Integer.toString(hour.sample());
String mi = Integer.toString(minute.sample());
String se = Integer.toString(second.sample());
String s = rand.nextInt(5000)+"helloworld";
SolrInputDocument doc = new SolrInputDocument();
doc.addField("id", id.toString());
doc.addField("num_s", i);
doc.addField("test_s", s);
doc.addField("id_i", (i+5000000));
doc.addField("id_ii", "2");
doc.addField("id_ii", "3");
int t = rand.nextInt(20);
doc.addField("abstract_t", paragraph.toString());
int year = rand.nextInt(50);
int month = rand.nextInt(12);
int day = rand.nextInt(30);
double load = loadD.sample();
double d = normal.sample();
double d1 = serviceLevel.sample();
double var1 = normal.sample();
double var2 = (var1+100000)+noise.sample();
float f = rand.nextFloat();
double cA = componentA.sample();
double cB = componentB.sample();
int factor = 0;
int monthIndex = rand.nextInt(11);
if(monthIndex % 2 == 0) {
factor=10;
} else {
factor=-10;
}
int dayIndex = rand.nextInt(21);
double stockPrice = noise2.sample();
double baseA = 15;
double baseB = 12;
doc.addField("stock_s", "stock_"+Integer.toString(year));
doc.addField("a.stock_s", Integer.toString(year));
doc.addField("price_i", Integer.toString((year*10)+factor));
doc.addField("day_i", Integer.toString(day));
doc.addField("load_d", Double.toString(load));
doc.addField("response_d", Double.toString((d/50D)+(load*.11)+noise1.sample()));
doc.addField("eresponse_d", Double.toString((d/2000D)+(normal.cumulativeProbability(d)*100)+noise2.sample()));
doc.addField("filesize_d", Double.toString(d));
doc.addField("filesize_td", Double.toString(d));
doc.addField("service_d", Double.toString(d1));
doc.addField("service_s", "level"+new Double(d1).intValue());
doc.addField("var1_d", Double.toString(var1));
doc.addField("var2_d", Double.toString(var2));
doc.addField("price_f", Float.toString(f));
doc.addField("tdate_dt", dates[dayIndex].replace("17", ho).replace("33", mi).replace("18", se));
//doc.addField("loc_p", moveLat+","+moveLong);
doc.addField("stock_priceA_d", baseA+stockPrice);
doc.addField("stock_priceB_d", baseB+stockPrice+((double)dayIndex));
doc.addField("componentA_d", cA);
doc.addField("componentB_d", cB);
doc.addField("prod_s", "product"+prod.sample());
for(int l=0; l<5; l++) {
String ps = "product"+prod.sample();
doc.addField("prod_ss",ps);
}
doc.addField("content_type_s", "parentDocument");
doc.addField("group_s", doc.getFieldValue("id").toString());
SolrInputDocument child = doc.deepCopy();
child.setField("content_type_s", "childDocument");
for(int c=0; c<50; c++) {
child.setField("id",UUID.randomUUID().toString());
doc.addChildDocument(child);
child = child.deepCopy();
}
request.add(doc);
//System.out.println("Doc Added");
if(i % 1000 == 0) {
System.out.println("Sending batch...");
request.process(client);
//client.commit();
System.out.println("Batch sent");
request = new UpdateRequest();
}
}
System.out.println("Processing last batch");
request.process(client);
client.commit();
client.close();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment