Last active
March 26, 2024 19:55
-
-
Save halitanildonmez/2255dd6852f16eaa88c84dd78ac7c076 to your computer and use it in GitHub Desktop.
A file reader that reads the large file to memory
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
private static final Unsafe UNSAFE = initUnsafe(); | |
private static Unsafe initUnsafe() { | |
try { | |
Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe"); | |
theUnsafe.setAccessible(true); | |
return (Unsafe) theUnsafe.get(Unsafe.class); | |
} | |
catch (NoSuchFieldException | IllegalAccessException e) { | |
throw new RuntimeException(e); | |
} | |
} | |
private static void testLargeChunk() throws IOException { | |
record StartEndRecord (long chunkStart, long chunkEnd) {} | |
List<StartEndRecord> records = new ArrayList<>(); | |
long startTime = System.currentTimeMillis(); | |
long numberOfChunks = 3l; | |
Map<String, DoubleSummaryStatistics> valueMap = new HashMap<>(); | |
long [] chunkStarts = new long[(int)numberOfChunks]; | |
long [] chunkEnds = new long[(int)numberOfChunks]; | |
FileChannel channel = null; | |
try (RandomAccessFile file = new RandomAccessFile(TEST_FILE_PATH, "r")) { | |
channel = file.getChannel(); | |
long size = channel.size(); | |
MemorySegment map = channel.map(FileChannel.MapMode.READ_ONLY, 0, size, Arena.global()); | |
final long start = map.address(); | |
final long end = start + size; | |
long segmentSize = size / numberOfChunks; | |
long s = start; | |
for (long i = 0; i < numberOfChunks; i++) { | |
chunkStarts[(int)i] = s; | |
long endCandidate = s + segmentSize; | |
byte b = UNSAFE.getByte(endCandidate); | |
while (b != '\n' && endCandidate < end) { | |
endCandidate++; | |
b = UNSAFE.getByte(endCandidate); | |
} | |
chunkEnds[(int)i] = endCandidate; | |
records.add(new StartEndRecord(s, endCandidate)); | |
s = Math.min(endCandidate, end); | |
} | |
} catch (Exception e) { | |
throw new RuntimeException(e); | |
} finally { | |
if (channel != null) | |
channel.close(); | |
} | |
try (ExecutorService service = Executors.newFixedThreadPool((int)numberOfChunks)) { | |
for (StartEndRecord record : records) { | |
service.execute(() -> { | |
long chunkStart = record.chunkStart; | |
long chunkEnd = record.chunkEnd; | |
long x = chunkStart; | |
String name = ""; | |
String number = ""; | |
while (x < chunkEnd) { | |
char c = (char)UNSAFE.getByte(x); | |
while (c != ';' && x < chunkEnd) { | |
name += c; | |
x++; | |
c = (char)UNSAFE.getByte(x); | |
} | |
x++; | |
if (x >= chunkEnd) { | |
break; | |
} | |
c = (char)UNSAFE.getByte(x); | |
while (c != '\n') { | |
number += c; | |
x++; | |
c = (char)UNSAFE.getByte(x); | |
} | |
double d = Double.parseDouble(number); | |
DoubleSummaryStatistics stats = valueMap.containsKey(name) ? valueMap.get(name) : | |
new DoubleSummaryStatistics(); | |
if (valueMap.containsKey(name)) { | |
stats.accept(d); | |
} else { | |
stats.accept(d); | |
} | |
stats.accept(d); | |
valueMap.put(name, stats); | |
name = ""; | |
number = ""; | |
} | |
}); | |
} | |
try { | |
service.shutdown(); | |
while (!service.awaitTermination(3, TimeUnit.MINUTES)) { | |
System.out.println("Not terminated"); | |
} | |
} catch (Exception e) { | |
e.printStackTrace(); | |
} | |
} | |
System.out.println(valueMap); | |
System.out.println("Total read and print time: " + (System.currentTimeMillis() - startTime) + " millis. "); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment