Skip to content

Instantly share code, notes, and snippets.

@awells10-chwy
Created February 4, 2025 19:19
Show Gist options
  • Save awells10-chwy/d292364be9f365cb0a1525ca43e1a05a to your computer and use it in GitHub Desktop.
Save awells10-chwy/d292364be9f365cb0a1525ca43e1a05a to your computer and use it in GitHub Desktop.
Query ORDER_SYNC events for last 24h and write order IDs to file
package com.chewy.orders360.api;
import org.elasticsearch.action.search.ClearScrollRequest;
import org.elasticsearch.action.search.ClearScrollResponse;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchScrollRequest;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.Scroll;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.junit.jupiter.api.Test;
import java.io.BufferedWriter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
class GetUniqOrderIds {
@Test
void name() {
try (RestHighLevelClient client = new RestHighLevelClient(
RestClient.builder(
// If you're using TLS or multiple hosts, adjust accordingly
new org.apache.http.HttpHost("vpc-prd-use1-orders-360-search-3773ox762leqnuaim4cbnou4ty.us-east-1.es.amazonaws.com", 443, "https")
)
);
BufferedWriter bufferedWriter = Files.newBufferedWriter(Paths.get("orderIds.txt"))) {
// 2) Define a scroll context: "keep the search context alive for 1 minute"
final Scroll scroll = new Scroll(TimeValue.timeValueMinutes(1L));
// 3) Build the query:
// {
// "_source": "none",
// "size": 10000,
// "query": {
// "bool": {
// "must": [
// { "range": { "startTime": { "gte": "now-24h" } } },
// { "term": { "orderEvent.keyword": { "value": "ORDER_SYNC" } } }
// ]
// }
// }
// }
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery()
.must(QueryBuilders.rangeQuery("startTime").gte("now-24h"))
.must(QueryBuilders.termQuery("orderEvent.keyword", "ORDER_SYNC"));
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
.query(boolQuery)
.fetchSource("orderId", null)
.size(10000);
SearchRequest searchRequest = new SearchRequest("order-event-data-2-2025");
searchRequest.scroll(scroll);
searchRequest.source(searchSourceBuilder);
// 4) Execute the initial search
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
// The scroll_id is returned by Elasticsearch
String scrollId = searchResponse.getScrollId();
SearchHit[] searchHits = searchResponse.getHits().getHits();
int totalProcessed = 0;
// 5) Loop until no more hits
while (searchHits != null && searchHits.length > 0) {
// Process this batch of hits
for (SearchHit hit : searchHits) {
// You have no _source (since fetchSource(false)),
// but you can still see doc ID, index, etc.
// If you want fields, set fetchSource(true) or fields(...) accordingly.
String orderId = hit.getSourceAsMap().get("orderId").toString();
if (totalProcessed % 1000 == 0) {
System.out.println("Got doc ID: " + orderId);
}
bufferedWriter.write(orderId);
bufferedWriter.newLine();
totalProcessed++;
}
// 6) Request the next batch using the scroll_id
SearchScrollRequest scrollRequest = new SearchScrollRequest(scrollId);
scrollRequest.scroll(scroll);
// Get the next batch of hits
searchResponse = client.scroll(scrollRequest, RequestOptions.DEFAULT);
scrollId = searchResponse.getScrollId();
searchHits = searchResponse.getHits().getHits();
}
// 7) Clear scroll context on the server to free resources
ClearScrollRequest clearScrollRequest = new ClearScrollRequest();
clearScrollRequest.addScrollId(scrollId);
ClearScrollResponse clearScrollResponse =
client.clearScroll(clearScrollRequest, RequestOptions.DEFAULT);
System.out.println("Cleared scroll: " + clearScrollResponse.isSucceeded());
System.out.println("Total docs processed: " + totalProcessed);
} catch (IOException e) {
e.printStackTrace();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment