Created
June 9, 2020 10:44
-
-
Save joel-bernstein/9d03ebaefb3114a55c0c719a31f6e821 to your computer and use it in GitHub Desktop.
Block loader
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.solr.client.solrj.impl.CloudSolrClient; | |
import org.apache.solr.client.solrj.impl.HttpSolrClient; | |
import org.apache.solr.client.solrj.SolrClient; | |
import org.apache.solr.client.solrj.request.UpdateRequest; | |
import org.apache.solr.common.SolrInputDocument; | |
import java.util.Random; | |
import java.util.Date; | |
import java.util.UUID; | |
import java.io.*; | |
import org.apache.commons.math3.distribution.NormalDistribution; | |
import org.apache.commons.math3.distribution.UniformIntegerDistribution; | |
import org.apache.commons.math3.distribution.EnumeratedIntegerDistribution; | |
import org.apache.commons.math3.distribution.ZipfDistribution; | |
public class BlockLoader { | |
public static void main(String args[]) throws Exception { | |
BufferedReader in = new BufferedReader(new FileReader("words.txt")); | |
String[] words = new String[3001]; | |
for(int w=0; w<words.length; w++) { | |
words[w] = in.readLine(); | |
} | |
in.close(); | |
int num = Integer.parseInt(args[0]); | |
String zkUrl = args[1]; | |
String collection = args[2]; | |
System.out.println("Connecting to "+zkUrl); | |
SolrClient client = null; | |
if(zkUrl.startsWith("http")) { | |
HttpSolrClient.Builder builder = new HttpSolrClient.Builder(); | |
client = builder.withBaseSolrUrl(zkUrl+"/"+collection).build(); | |
} else { | |
CloudSolrClient.Builder builder = new CloudSolrClient.Builder(); | |
CloudSolrClient c = builder.withZkHost(zkUrl).build(); | |
c.setDefaultCollection(collection); | |
client = c; | |
} | |
UpdateRequest request = new UpdateRequest(); | |
System.out.println("Connected"); | |
int i=0; | |
String[] dates = {"2012-01-20T17:33:18Z", | |
"2012-02-20T17:33:18Z", | |
"2012-03-20T17:33:18Z", | |
"2012-04-20T17:33:18Z", | |
"2012-05-20T17:33:18Z", | |
"2012-06-20T17:33:18Z", | |
"2012-07-20T17:33:18Z", | |
"2012-08-20T17:33:18Z", | |
"2012-09-20T17:33:18Z", | |
"2012-10-20T17:33:18Z", | |
"2012-11-20T17:33:18Z", | |
"2012-12-20T17:33:18Z", | |
"2013-01-20T17:33:18Z", | |
"2013-02-20T17:33:18Z", | |
"2013-03-20T17:33:18Z", | |
"2013-04-20T17:33:18Z", | |
"2013-05-20T17:33:18Z", | |
"2013-06-20T17:33:18Z", | |
"2013-07-20T17:33:18Z", | |
"2013-08-20T17:33:18Z", | |
"2013-09-20T17:33:18Z", | |
"2013-10-20T17:33:18Z", | |
}; | |
Random rand = new Random(); | |
NormalDistribution normal = new NormalDistribution(40000, 2000); | |
NormalDistribution noise = new NormalDistribution(100, 15); | |
NormalDistribution noise1 = new NormalDistribution(0, 15); | |
NormalDistribution noise2 = new NormalDistribution(0, 7); | |
NormalDistribution noise3 = new NormalDistribution(0, 2); | |
NormalDistribution loadD = new NormalDistribution(500, 100); | |
UniformIntegerDistribution serviceLevel = new UniformIntegerDistribution(1, 4); | |
NormalDistribution componentA = new NormalDistribution(500, 50); | |
NormalDistribution componentB = new NormalDistribution(500, 25); | |
int[] prods = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; | |
double[] probs = {1,2,3,4,5,6,7,25,9,10,37,12,13,14,15}; | |
EnumeratedIntegerDistribution enumd = new EnumeratedIntegerDistribution(prods, probs); | |
double lat = 40.7128; | |
double lon = -74.0060; | |
UniformIntegerDistribution hour = new UniformIntegerDistribution(1,17); | |
UniformIntegerDistribution minute = new UniformIntegerDistribution(1,56); | |
UniformIntegerDistribution second = new UniformIntegerDistribution(1,56); | |
UniformIntegerDistribution prod = new UniformIntegerDistribution(1,500000); | |
ZipfDistribution zipF = new ZipfDistribution(3000, 1); | |
for(i=0; i<num; i++) { | |
//System.out.println("Adding doc:"+i); | |
UUID id = UUID.randomUUID(); | |
StringBuilder paragraph = new StringBuilder(); | |
for(int w=0; w<70; w++) { | |
int word = zipF.sample(); | |
paragraph.append(word); | |
paragraph.append(" "); | |
} | |
double moveLat = lat+componentA.sample(); | |
double moveLong = lon+componentB.sample(); | |
String ho = Integer.toString(hour.sample()); | |
String mi = Integer.toString(minute.sample()); | |
String se = Integer.toString(second.sample()); | |
String s = rand.nextInt(5000)+"helloworld"; | |
SolrInputDocument doc = new SolrInputDocument(); | |
doc.addField("id", id.toString()); | |
doc.addField("num_s", i); | |
doc.addField("test_s", s); | |
doc.addField("id_i", (i+5000000)); | |
doc.addField("id_ii", "2"); | |
doc.addField("id_ii", "3"); | |
int t = rand.nextInt(20); | |
doc.addField("abstract_t", paragraph.toString()); | |
int year = rand.nextInt(50); | |
int month = rand.nextInt(12); | |
int day = rand.nextInt(30); | |
double load = loadD.sample(); | |
double d = normal.sample(); | |
double d1 = serviceLevel.sample(); | |
double var1 = normal.sample(); | |
double var2 = (var1+100000)+noise.sample(); | |
float f = rand.nextFloat(); | |
double cA = componentA.sample(); | |
double cB = componentB.sample(); | |
int factor = 0; | |
int monthIndex = rand.nextInt(11); | |
if(monthIndex % 2 == 0) { | |
factor=10; | |
} else { | |
factor=-10; | |
} | |
int dayIndex = rand.nextInt(21); | |
double stockPrice = noise2.sample(); | |
double baseA = 15; | |
double baseB = 12; | |
doc.addField("stock_s", "stock_"+Integer.toString(year)); | |
doc.addField("a.stock_s", Integer.toString(year)); | |
doc.addField("price_i", Integer.toString((year*10)+factor)); | |
doc.addField("day_i", Integer.toString(day)); | |
doc.addField("load_d", Double.toString(load)); | |
doc.addField("response_d", Double.toString((d/50D)+(load*.11)+noise1.sample())); | |
doc.addField("eresponse_d", Double.toString((d/2000D)+(normal.cumulativeProbability(d)*100)+noise2.sample())); | |
doc.addField("filesize_d", Double.toString(d)); | |
doc.addField("filesize_td", Double.toString(d)); | |
doc.addField("service_d", Double.toString(d1)); | |
doc.addField("service_s", "level"+new Double(d1).intValue()); | |
doc.addField("var1_d", Double.toString(var1)); | |
doc.addField("var2_d", Double.toString(var2)); | |
doc.addField("price_f", Float.toString(f)); | |
doc.addField("tdate_dt", dates[dayIndex].replace("17", ho).replace("33", mi).replace("18", se)); | |
//doc.addField("loc_p", moveLat+","+moveLong); | |
doc.addField("stock_priceA_d", baseA+stockPrice); | |
doc.addField("stock_priceB_d", baseB+stockPrice+((double)dayIndex)); | |
doc.addField("componentA_d", cA); | |
doc.addField("componentB_d", cB); | |
doc.addField("prod_s", "product"+prod.sample()); | |
for(int l=0; l<5; l++) { | |
String ps = "product"+prod.sample(); | |
doc.addField("prod_ss",ps); | |
} | |
doc.addField("content_type_s", "parentDocument"); | |
doc.addField("group_s", doc.getFieldValue("id").toString()); | |
SolrInputDocument child = doc.deepCopy(); | |
child.setField("content_type_s", "childDocument"); | |
for(int c=0; c<50; c++) { | |
child.setField("id",UUID.randomUUID().toString()); | |
doc.addChildDocument(child); | |
child = child.deepCopy(); | |
} | |
request.add(doc); | |
//System.out.println("Doc Added"); | |
if(i % 1000 == 0) { | |
System.out.println("Sending batch..."); | |
request.process(client); | |
//client.commit(); | |
System.out.println("Batch sent"); | |
request = new UpdateRequest(); | |
} | |
} | |
System.out.println("Processing last batch"); | |
request.process(client); | |
client.commit(); | |
client.close(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment