Skip to content

Instantly share code, notes, and snippets.

@kosmosr
Created December 24, 2020 16:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kosmosr/237c94573cdf1813daaa1f0250ad8e45 to your computer and use it in GitHub Desktop.
Save kosmosr/237c94573cdf1813daaa1f0250ad8e45 to your computer and use it in GitHub Desktop.
package top.mollysu;
import java.io.*;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
/**
* 20000 / 10 = 2000
* [0..2000]
* raf.
*/
public class CsvUtil {
private static int getLineNumber(File file) {
try (
LineNumberReader lineNumberReader = new LineNumberReader(new InputStreamReader(new FileInputStream(file)))
) {
lineNumberReader.skip(Long.MAX_VALUE);
int lineNumber = lineNumberReader.getLineNumber();
return lineNumber + 1;
} catch (IOException e) {
e.printStackTrace();
return -1;
}
}
private static List<Long> getRange(File file, Integer count) throws IOException {
long length = file.length();
long maxLineLength = length / count;
System.out.println("length:" + length);
System.out.println("maxLineLength:" + maxLineLength);
RandomAccessFile raf = new RandomAccessFile(file, "r");
List<Long> range = new ArrayList<>(count);
range.add(0L);
for (int i = 1; i < count; i++) {
raf.seek(i * maxLineLength);
while (raf.readByte() != '\n') {
}
range.add(raf.getFilePointer() + 1);
}
range.forEach(e -> {
try {
raf.seek(e);
String line = new String(raf.readLine().getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8);
System.out.println(line);
} catch (IOException ioException) {
ioException.printStackTrace();
}
});
return range;
}
public static void read(String fileName, int count) throws URISyntaxException, IOException, InterruptedException {
URL resource = CsvUtil.class.getClassLoader().getResource(fileName);
File file = new File(resource.toURI());
List<Long> range = getRange(file, count);
System.out.println(range);
RandomAccessFile raf = new RandomAccessFile(file, "r");
CountDownLatch countDownLatch = new CountDownLatch(count);
ExecutorService service = Executors.newFixedThreadPool(count);
for (int i = 0; i < count; i++) {
int finalI = i;
service.execute(() -> {
long end;
if (finalI == count - 1) {
end = file.length();
} else {
end = range.get(finalI + 1);
}
try {
getWrite(fileName, finalI, range.get(finalI), end);
countDownLatch.countDown();
} catch (URISyntaxException e) {
e.printStackTrace();
}
});
}
countDownLatch.await();
}
public static long getWrite(String fileName, int index, long begin, long end) throws URISyntaxException {
System.out.println("index: " + index);
URL resource = CsvUtil.class.getClassLoader().getResource(fileName);
File file = new File(resource.toURI());
try (RandomAccessFile in = new RandomAccessFile(file, "r");
RandomAccessFile out = new RandomAccessFile(new File(fileName + "_" + index + ".csv"), "rw")) {
in.seek(begin);
while (in.getFilePointer() <= end) {
String line = new String(in.readLine().getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8) + '\n';
out.writeBytes(line);
}
return in.getFilePointer();
} catch (IOException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
public static void main(String[] args) throws URISyntaxException, IOException, InterruptedException {
CsvUtil.read("stock_daily_basic.csv", 10);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment