Skip to content

Instantly share code, notes, and snippets.

@haruo31
Created December 6, 2016 17:21
Show Gist options
  • Save haruo31/c062fa4e00920a6e8d5517f5ae0a5e83 to your computer and use it in GitHub Desktop.
Save haruo31/c062fa4e00920a6e8d5517f5ae0a5e83 to your computer and use it in GitHub Desktop.
sample java project
$ java -Xmx140M -jar target/sample-0.0.1-SNAPSHOT.jar mecab-ipadic-neologd-master/seed/mecab-user-dict-seed.20161205.csv.xz
01:04:44.734 [pool-1-thread-1] INFO sample.SampleMain - In: 405, Out: 389, Count: 29736, Total: 134217728, Free: 33947360
01:04:47.731 [pool-1-thread-1] INFO sample.SampleMain - In: 873, Out: 857, Count: 64788, Total: 134217728, Free: 33947360
01:04:50.728 [pool-1-thread-1] INFO sample.SampleMain - In: 1316, Out: 1300, Count: 96759, Total: 134217728, Free: 33947360
01:04:53.732 [pool-1-thread-1] INFO sample.SampleMain - In: 1788, Out: 1772, Count: 131953, Total: 134217728, Free: 33947360
01:04:56.731 [pool-1-thread-1] INFO sample.SampleMain - In: 2242, Out: 2226, Count: 165813, Total: 134217728, Free: 33947360
01:04:59.732 [pool-1-thread-1] INFO sample.SampleMain - In: 2698, Out: 2682, Count: 199371, Total: 134217728, Free: 33947360
01:05:02.728 [pool-1-thread-1] INFO sample.SampleMain - In: 3155, Out: 3139, Count: 234133, Total: 134217728, Free: 33947360
01:05:05.729 [pool-1-thread-1] INFO sample.SampleMain - In: 3633, Out: 3617, Count: 270345, Total: 134217728, Free: 33947360
01:05:08.728 [pool-1-thread-1] INFO sample.SampleMain - In: 4083, Out: 4067, Count: 303688, Total: 134217728, Free: 33947360
01:05:11.728 [pool-1-thread-1] INFO sample.SampleMain - In: 4541, Out: 4525, Count: 339285, Total: 134217728, Free: 33803576
01:05:14.728 [pool-1-thread-1] INFO sample.SampleMain - In: 5009, Out: 4993, Count: 374753, Total: 134217728, Free: 33718512
01:05:17.730 [pool-1-thread-1] INFO sample.SampleMain - In: 5491, Out: 5475, Count: 410735, Total: 134217728, Free: 33718512
01:05:20.730 [pool-1-thread-1] INFO sample.SampleMain - In: 5955, Out: 5939, Count: 446729, Total: 134217728, Free: 33718512
01:05:23.730 [pool-1-thread-1] INFO sample.SampleMain - In: 6414, Out: 6398, Count: 482890, Total: 134217728, Free: 33718512
01:05:26.728 [pool-1-thread-1] INFO sample.SampleMain - In: 6866, Out: 6850, Count: 518372, Total: 134217728, Free: 33718512
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>sample</groupId>
<artifactId>sample</artifactId>
<version>0.0.1-SNAPSHOT</version>
<build>
<sourceDirectory>src</sourceDirectory>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.5.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.4.3</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<transformers>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>sample.SampleMain</mainClass>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>org.tukaani</groupId>
<artifactId>xz</artifactId>
<version>1.6</version>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
<version>1.1.7</version>
</dependency>
</dependencies>
</project>
package sample;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UncheckedIOException;
import java.nio.file.Files;
import java.util.Random;
import java.util.concurrent.Executors;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.LongAdder;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.tukaani.xz.XZInputStream;
public class SampleMain {
static Logger logger = LoggerFactory.getLogger(SampleMain.class);
static LongAdder in = new LongAdder();
static LongAdder out = new LongAdder();
static LongAdder count = new LongAdder();
static Random rnd = new Random();
public static void main(String[] args) throws Exception {
File list = new File(args[0]); // mecab-ipadic-neologd-master/seed/neologd-adjective-exp-dict-seed.20151126.csv.xzとか入れてね
ScheduledExecutorService svc = Executors.newScheduledThreadPool(1);
try {
svc.scheduleAtFixedRate(() -> {
Runtime runtime = Runtime.getRuntime();
logger.info("In: {}, Out: {}, Count: {}, Total: {}, Free: {}", in, out, count, runtime.totalMemory(), runtime.freeMemory());
}, 3, 3, TimeUnit.SECONDS);
Pattern DELIM = Pattern.compile("¥¥s*");
ForkJoinPool executor = new ForkJoinPool(16);
executor.submit(() -> {
try (InputStream xzin = new XZInputStream(Files.newInputStream(list.toPath()))) {
new BufferedReader(new InputStreamReader(xzin, "UTF-8")).lines()
.parallel().unordered()
.map(s -> {
in.increment();
return DELIM.matcher(s).replaceAll(" ");
})
.forEach(SampleMain::verySlowFunction);
;
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}).get();
} finally {
svc.shutdown();
}
}
public static void verySlowFunction(String value) {
count.add(value.length());
try {
TimeUnit.MILLISECONDS.sleep(rnd.nextInt(100));
} catch (InterruptedException e) {
// do nothing
}
out.increment();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment