Created
May 4, 2023 15:51
-
-
Save its-snorlax/fd4fdcb931c991edc071e02822bcd3b3 to your computer and use it in GitHub Desktop.
Comparing runtime for count with and without aggregation query
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import com.google.cloud.Tuple; | |
import com.google.cloud.datastore.AggregationQuery; | |
import com.google.cloud.datastore.AggregationResult; | |
import com.google.cloud.datastore.AggregationResults; | |
import com.google.cloud.datastore.Datastore; | |
import com.google.cloud.datastore.DatastoreOptions; | |
import com.google.cloud.datastore.Entity; | |
import com.google.cloud.datastore.Key; | |
import com.google.cloud.datastore.KeyQuery; | |
import com.google.cloud.datastore.Query; | |
import com.google.cloud.datastore.QueryResults; | |
import com.google.cloud.datastore.aggregation.Aggregation; | |
import com.google.common.base.Stopwatch; | |
import com.google.common.collect.ImmutableList; | |
import com.google.common.collect.Iterables; | |
import java.nio.charset.Charset; | |
import java.util.ArrayList; | |
import java.util.List; | |
import java.util.Random; | |
import java.util.concurrent.TimeUnit; | |
import java.util.stream.StreamSupport; | |
public class App { | |
private static List<Tuple<Integer, Integer>> getBatches(int upperBound) { | |
int size = 100000; | |
int start = 0; | |
List<Tuple<Integer, Integer>> ranges = new ArrayList<>(); | |
while (true) { | |
Tuple<Integer, Integer> range = Tuple.of(start, Math.min(upperBound, start + size)); | |
ranges.add(range); | |
start = start + size + 1; | |
if (start > upperBound) { | |
break; | |
} | |
} | |
return ranges; | |
} | |
public static void createEntities(int size) { | |
Datastore datastore = DatastoreOptions.getDefaultInstance().getService(); | |
List<Tuple<Integer, Integer>> batches = getBatches(size); // getting batches of 100k | |
//saving 1000 entities | |
for (Tuple<Integer, Integer> batch : batches) { | |
List<Entity> entities = new ArrayList<>(size); | |
for (int i = batch.x(); i <= batch.y(); i++) { | |
Key key = datastore.newKeyFactory().setKind("RandomTask").newKey("sampleTask" + i); | |
Entity entity = Entity.newBuilder(key) | |
.set("description", randomString()) | |
.build(); | |
entities.add(entity); | |
} | |
datastore.put(entities.toArray(new Entity[]{})); | |
System.out.printf("record inserted %d to %d\n", batch.x(), batch.y()); | |
} | |
} | |
public static void rawCount() { | |
System.out.println("Key Only Query"); | |
Datastore datastore = DatastoreOptions.getDefaultInstance().getService(); | |
KeyQuery keyOnlyQuery = Query.newKeyQueryBuilder() | |
.setKind("RandomTask") | |
.build(); | |
Stopwatch stopWatch = Stopwatch.createStarted(); | |
QueryResults<Key> allKeys = datastore.run(keyOnlyQuery); | |
Iterable<Key> iterable = () -> allKeys; | |
Key[] keys = StreamSupport.stream(iterable.spliterator(), false).toArray(Key[]::new); | |
stopWatch.stop(); | |
System.out.println("total size: " + keys.length); | |
System.out.println( | |
"time taken for count (in milliseconds): " + stopWatch.elapsed(TimeUnit.MILLISECONDS)); | |
} | |
public static void aggregationQueryCount() { | |
System.out.println("Aggregation Query"); | |
Datastore datastore = DatastoreOptions.getDefaultInstance().getService(); | |
KeyQuery keyOnlyQuery = Query.newKeyQueryBuilder() | |
.setKind("RandomTask") | |
.build(); | |
AggregationQuery aggregationQuery = Query.newAggregationQueryBuilder() | |
.addAggregation(Aggregation.count().as("my_count")) | |
.over(keyOnlyQuery) | |
.build(); | |
Stopwatch stopWatch = Stopwatch.createStarted(); | |
AggregationResults aggregationResults = datastore.runAggregation(aggregationQuery); | |
AggregationResult aggregationResult = Iterables.getOnlyElement(aggregationResults); | |
stopWatch.stop(); | |
System.out.println("total size: " + aggregationResult.get("my_count")); | |
System.out.println( | |
"time taken for count (in milliseconds): " + stopWatch.elapsed(TimeUnit.MILLISECONDS)); | |
} | |
public static void deleteAll() { | |
Datastore datastore = DatastoreOptions.getDefaultInstance().getService(); | |
KeyQuery keyOnlyQuery = Query.newKeyQueryBuilder() | |
.setKind("RandomTask") | |
.build(); | |
QueryResults<Key> allKeys = datastore.run(keyOnlyQuery); | |
Key[] keysToDelete = ImmutableList.copyOf(allKeys).toArray(new Key[]{}); | |
datastore.delete(keysToDelete); | |
} | |
public static void main(String[] args) { | |
deleteAll(); | |
createEntities(1000112); | |
rawCount(); | |
System.out.println("---------------------------"); | |
aggregationQueryCount(); | |
} | |
private static String randomString() { | |
byte[] array = new byte[7]; // length is bounded by 7 | |
new Random().nextBytes(array); | |
return new String(array, Charset.forName("UTF-8")); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment