Created
February 4, 2025 19:19
-
-
Save awells10-chwy/d292364be9f365cb0a1525ca43e1a05a to your computer and use it in GitHub Desktop.
Query ORDER_SYNC events for last 24h and write order IDs to file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.chewy.orders360.api; | |
import org.elasticsearch.action.search.ClearScrollRequest; | |
import org.elasticsearch.action.search.ClearScrollResponse; | |
import org.elasticsearch.action.search.SearchRequest; | |
import org.elasticsearch.action.search.SearchResponse; | |
import org.elasticsearch.action.search.SearchScrollRequest; | |
import org.elasticsearch.client.RequestOptions; | |
import org.elasticsearch.client.RestClient; | |
import org.elasticsearch.client.RestHighLevelClient; | |
import org.elasticsearch.common.unit.TimeValue; | |
import org.elasticsearch.index.query.BoolQueryBuilder; | |
import org.elasticsearch.index.query.QueryBuilders; | |
import org.elasticsearch.search.Scroll; | |
import org.elasticsearch.search.SearchHit; | |
import org.elasticsearch.search.builder.SearchSourceBuilder; | |
import org.junit.jupiter.api.Test; | |
import java.io.BufferedWriter; | |
import java.io.IOException; | |
import java.nio.file.Files; | |
import java.nio.file.Paths; | |
class GetUniqOrderIds { | |
@Test | |
void name() { | |
try (RestHighLevelClient client = new RestHighLevelClient( | |
RestClient.builder( | |
// If you're using TLS or multiple hosts, adjust accordingly | |
new org.apache.http.HttpHost("vpc-prd-use1-orders-360-search-3773ox762leqnuaim4cbnou4ty.us-east-1.es.amazonaws.com", 443, "https") | |
) | |
); | |
BufferedWriter bufferedWriter = Files.newBufferedWriter(Paths.get("orderIds.txt"))) { | |
// 2) Define a scroll context: "keep the search context alive for 1 minute" | |
final Scroll scroll = new Scroll(TimeValue.timeValueMinutes(1L)); | |
// 3) Build the query: | |
// { | |
// "_source": "none", | |
// "size": 10000, | |
// "query": { | |
// "bool": { | |
// "must": [ | |
// { "range": { "startTime": { "gte": "now-24h" } } }, | |
// { "term": { "orderEvent.keyword": { "value": "ORDER_SYNC" } } } | |
// ] | |
// } | |
// } | |
// } | |
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery() | |
.must(QueryBuilders.rangeQuery("startTime").gte("now-24h")) | |
.must(QueryBuilders.termQuery("orderEvent.keyword", "ORDER_SYNC")); | |
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder() | |
.query(boolQuery) | |
.fetchSource("orderId", null) | |
.size(10000); | |
SearchRequest searchRequest = new SearchRequest("order-event-data-2-2025"); | |
searchRequest.scroll(scroll); | |
searchRequest.source(searchSourceBuilder); | |
// 4) Execute the initial search | |
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT); | |
// The scroll_id is returned by Elasticsearch | |
String scrollId = searchResponse.getScrollId(); | |
SearchHit[] searchHits = searchResponse.getHits().getHits(); | |
int totalProcessed = 0; | |
// 5) Loop until no more hits | |
while (searchHits != null && searchHits.length > 0) { | |
// Process this batch of hits | |
for (SearchHit hit : searchHits) { | |
// You have no _source (since fetchSource(false)), | |
// but you can still see doc ID, index, etc. | |
// If you want fields, set fetchSource(true) or fields(...) accordingly. | |
String orderId = hit.getSourceAsMap().get("orderId").toString(); | |
if (totalProcessed % 1000 == 0) { | |
System.out.println("Got doc ID: " + orderId); | |
} | |
bufferedWriter.write(orderId); | |
bufferedWriter.newLine(); | |
totalProcessed++; | |
} | |
// 6) Request the next batch using the scroll_id | |
SearchScrollRequest scrollRequest = new SearchScrollRequest(scrollId); | |
scrollRequest.scroll(scroll); | |
// Get the next batch of hits | |
searchResponse = client.scroll(scrollRequest, RequestOptions.DEFAULT); | |
scrollId = searchResponse.getScrollId(); | |
searchHits = searchResponse.getHits().getHits(); | |
} | |
// 7) Clear scroll context on the server to free resources | |
ClearScrollRequest clearScrollRequest = new ClearScrollRequest(); | |
clearScrollRequest.addScrollId(scrollId); | |
ClearScrollResponse clearScrollResponse = | |
client.clearScroll(clearScrollRequest, RequestOptions.DEFAULT); | |
System.out.println("Cleared scroll: " + clearScrollResponse.isSucceeded()); | |
System.out.println("Total docs processed: " + totalProcessed); | |
} catch (IOException e) { | |
e.printStackTrace(); | |
} | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment