Skip to content

Instantly share code, notes, and snippets.

@halitanildonmez
Last active March 26, 2024 19:55
Show Gist options
  • Save halitanildonmez/2255dd6852f16eaa88c84dd78ac7c076 to your computer and use it in GitHub Desktop.
Save halitanildonmez/2255dd6852f16eaa88c84dd78ac7c076 to your computer and use it in GitHub Desktop.
A file reader that reads the large file to memory
private static final Unsafe UNSAFE = initUnsafe();
private static Unsafe initUnsafe() {
try {
Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe");
theUnsafe.setAccessible(true);
return (Unsafe) theUnsafe.get(Unsafe.class);
}
catch (NoSuchFieldException | IllegalAccessException e) {
throw new RuntimeException(e);
}
}
private static void testLargeChunk() throws IOException {
record StartEndRecord (long chunkStart, long chunkEnd) {}
List<StartEndRecord> records = new ArrayList<>();
long startTime = System.currentTimeMillis();
long numberOfChunks = 3l;
Map<String, DoubleSummaryStatistics> valueMap = new HashMap<>();
long [] chunkStarts = new long[(int)numberOfChunks];
long [] chunkEnds = new long[(int)numberOfChunks];
FileChannel channel = null;
try (RandomAccessFile file = new RandomAccessFile(TEST_FILE_PATH, "r")) {
channel = file.getChannel();
long size = channel.size();
MemorySegment map = channel.map(FileChannel.MapMode.READ_ONLY, 0, size, Arena.global());
final long start = map.address();
final long end = start + size;
long segmentSize = size / numberOfChunks;
long s = start;
for (long i = 0; i < numberOfChunks; i++) {
chunkStarts[(int)i] = s;
long endCandidate = s + segmentSize;
byte b = UNSAFE.getByte(endCandidate);
while (b != '\n' && endCandidate < end) {
endCandidate++;
b = UNSAFE.getByte(endCandidate);
}
chunkEnds[(int)i] = endCandidate;
records.add(new StartEndRecord(s, endCandidate));
s = Math.min(endCandidate, end);
}
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
if (channel != null)
channel.close();
}
try (ExecutorService service = Executors.newFixedThreadPool((int)numberOfChunks)) {
for (StartEndRecord record : records) {
service.execute(() -> {
long chunkStart = record.chunkStart;
long chunkEnd = record.chunkEnd;
long x = chunkStart;
String name = "";
String number = "";
while (x < chunkEnd) {
char c = (char)UNSAFE.getByte(x);
while (c != ';' && x < chunkEnd) {
name += c;
x++;
c = (char)UNSAFE.getByte(x);
}
x++;
if (x >= chunkEnd) {
break;
}
c = (char)UNSAFE.getByte(x);
while (c != '\n') {
number += c;
x++;
c = (char)UNSAFE.getByte(x);
}
double d = Double.parseDouble(number);
DoubleSummaryStatistics stats = valueMap.containsKey(name) ? valueMap.get(name) :
new DoubleSummaryStatistics();
if (valueMap.containsKey(name)) {
stats.accept(d);
} else {
stats.accept(d);
}
stats.accept(d);
valueMap.put(name, stats);
name = "";
number = "";
}
});
}
try {
service.shutdown();
while (!service.awaitTermination(3, TimeUnit.MINUTES)) {
System.out.println("Not terminated");
}
} catch (Exception e) {
e.printStackTrace();
}
}
System.out.println(valueMap);
System.out.println("Total read and print time: " + (System.currentTimeMillis() - startTime) + " millis. ");
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment