Last active
March 26, 2024 21:07
-
-
Save halitanildonmez/c9df99d3f35de7f5b0be6c5795b0caf7 to your computer and use it in GitHub Desktop.
Another implementation for fast file reading
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
private static void readFile() throws IOException { | |
List<StartEndRecord> records = new ArrayList<>(); | |
long numberOfChunks = 3l; | |
try (RandomAccessFile file = new RandomAccessFile(TEST_FILE_PATH, "r")) { | |
FileChannel channel = file.getChannel(); | |
long fileSize = channel.size(); | |
MemorySegment map = channel.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, Arena.global()); | |
long start = 0; | |
long chunkSize = fileSize / numberOfChunks; | |
for (long i = 0; i < numberOfChunks; i++) { | |
long endCandidate = start + chunkSize; | |
while (endCandidate < fileSize && map.get(ValueLayout.JAVA_BYTE, endCandidate) != '\n') { | |
endCandidate++; | |
} | |
records.add(new StartEndRecord(start, Math.min(endCandidate, fileSize))); | |
start = Math.min(endCandidate, fileSize); | |
} | |
Map<String, DoubleSummaryStatistics> stringDoubleSummaryStatisticsMap = processFile((int) numberOfChunks, records, map); | |
System.out.println(stringDoubleSummaryStatisticsMap); | |
} | |
} | |
private static Map<String, DoubleSummaryStatistics> processFile (int numChunks, List<StartEndRecord> records, MemorySegment map) { | |
Map<String, DoubleSummaryStatistics> valueMap = new HashMap<>(); | |
try (ExecutorService service = Executors.newFixedThreadPool(numChunks)) { | |
records.parallelStream().forEach(r -> { | |
service.execute(() -> { | |
long chunkStart = r.chunkStart; | |
long chunkEnd = r.chunkEnd; | |
long index = chunkStart; | |
String name = ""; | |
String number = ""; | |
// TODO: for the reader, this could and should be extracted to another method | |
while (index < chunkEnd) { | |
char currentChar = (char)map.get(ValueLayout.JAVA_BYTE, index); | |
while (currentChar != ';' && index < chunkEnd) { | |
name += currentChar; | |
index++; | |
currentChar = (char)map.get(ValueLayout.JAVA_BYTE, index); | |
} | |
index++; | |
if (index >= chunkEnd) { | |
break; | |
} | |
currentChar = (char)map.get(ValueLayout.JAVA_BYTE, index); | |
while (currentChar != '\n') { | |
number += currentChar; | |
index++; | |
if (index >= chunkEnd) { | |
break; | |
} | |
currentChar = (char)map.get(ValueLayout.JAVA_BYTE, index); | |
} | |
double d = Double.parseDouble(number); | |
DoubleSummaryStatistics stats = valueMap.containsKey(name) ? valueMap.get(name) : new DoubleSummaryStatistics(); | |
if (valueMap.containsKey(name)) { | |
stats.accept(d); | |
} else { | |
stats.accept(d); | |
} | |
stats.accept(d); | |
valueMap.put(name, stats); | |
name = ""; | |
number = ""; | |
} | |
}); | |
}); | |
try { | |
service.shutdown(); | |
while (!service.awaitTermination(3, TimeUnit.MINUTES)) { | |
System.out.println("Not terminated yet"); | |
} | |
} catch (Exception e) { | |
e.printStackTrace(); | |
} | |
return valueMap; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment