Skip to content

Instantly share code, notes, and snippets.

@halitanildonmez
Last active March 26, 2024 21:07
Show Gist options
  • Save halitanildonmez/c9df99d3f35de7f5b0be6c5795b0caf7 to your computer and use it in GitHub Desktop.
Save halitanildonmez/c9df99d3f35de7f5b0be6c5795b0caf7 to your computer and use it in GitHub Desktop.
Another implementation for fast file reading
private static void readFile() throws IOException {
List<StartEndRecord> records = new ArrayList<>();
long numberOfChunks = 3l;
try (RandomAccessFile file = new RandomAccessFile(TEST_FILE_PATH, "r")) {
FileChannel channel = file.getChannel();
long fileSize = channel.size();
MemorySegment map = channel.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, Arena.global());
long start = 0;
long chunkSize = fileSize / numberOfChunks;
for (long i = 0; i < numberOfChunks; i++) {
long endCandidate = start + chunkSize;
while (endCandidate < fileSize && map.get(ValueLayout.JAVA_BYTE, endCandidate) != '\n') {
endCandidate++;
}
records.add(new StartEndRecord(start, Math.min(endCandidate, fileSize)));
start = Math.min(endCandidate, fileSize);
}
Map<String, DoubleSummaryStatistics> stringDoubleSummaryStatisticsMap = processFile((int) numberOfChunks, records, map);
System.out.println(stringDoubleSummaryStatisticsMap);
}
}
private static Map<String, DoubleSummaryStatistics> processFile (int numChunks, List<StartEndRecord> records, MemorySegment map) {
Map<String, DoubleSummaryStatistics> valueMap = new HashMap<>();
try (ExecutorService service = Executors.newFixedThreadPool(numChunks)) {
records.parallelStream().forEach(r -> {
service.execute(() -> {
long chunkStart = r.chunkStart;
long chunkEnd = r.chunkEnd;
long index = chunkStart;
String name = "";
String number = "";
// TODO: for the reader, this could and should be extracted to another method
while (index < chunkEnd) {
char currentChar = (char)map.get(ValueLayout.JAVA_BYTE, index);
while (currentChar != ';' && index < chunkEnd) {
name += currentChar;
index++;
currentChar = (char)map.get(ValueLayout.JAVA_BYTE, index);
}
index++;
if (index >= chunkEnd) {
break;
}
currentChar = (char)map.get(ValueLayout.JAVA_BYTE, index);
while (currentChar != '\n') {
number += currentChar;
index++;
if (index >= chunkEnd) {
break;
}
currentChar = (char)map.get(ValueLayout.JAVA_BYTE, index);
}
double d = Double.parseDouble(number);
DoubleSummaryStatistics stats = valueMap.containsKey(name) ? valueMap.get(name) : new DoubleSummaryStatistics();
if (valueMap.containsKey(name)) {
stats.accept(d);
} else {
stats.accept(d);
}
stats.accept(d);
valueMap.put(name, stats);
name = "";
number = "";
}
});
});
try {
service.shutdown();
while (!service.awaitTermination(3, TimeUnit.MINUTES)) {
System.out.println("Not terminated yet");
}
} catch (Exception e) {
e.printStackTrace();
}
return valueMap;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment