Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Prepare data: define intervals and its lenght
// first find jumps to define the continuous periods of data
Long firstElement = times.first();
Long lastElement = times.sortBy(time -> time, false, 1).first();
JavaRDD<Long> firstRDD = timestamps.filter(record -> record > firstElement);
JavaRDD<Long> secondRDD = timestamps.filter(record -> record < lastElement);
// compute the difference between each timestamp
// and then if the difference is greater than 100 000 000, it must be different periods of recording, so we have a jump
// I have chosen 100 000 000 (the values are recording every 50 000 000 and around 0.05% are spaced by more than 100 000 000).
JavaPairRDD<Long, Long> jumps = firstRDD.zip(secondRDD)
.mapToPair(pair -> new Tuple2<>(new Long[]{pair._1, pair._2}, pair._1 - pair._2))
.filter(pair -> pair._2 > 100000000)
.mapToPair(pair -> new Tuple2<>(pair._1[1], pair._1[0]));
// Now define the intervals
List<Long> flatten = tsJump.flatMap(pair -> Arrays.asList(pair._1, pair._2))
.sortBy(t -> t, true, 1)
.collect();
int size = flatten.size(); // always even
List<Long[]> results = new ArrayList<>();
// init condition
results.add(new Long[]{firstElement, flatten.get(0), (long) Math.round((flatten.get(0) - firstElement) / windows)});
for (int i = 1; i < size - 1; i+=2) {
results.add(new Long[]{flatten.get(i), flatten.get(i + 1), (long) Math.round((flatten.get(i + 1) - flatten.get(i)) / windows)});
}
// end condition
results.add(new Long[]{flatten.get(size - 1), lastElement, (long) Math.round((lastElement - flatten.get(size - 1)) / windows)});
return results;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.