Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Regexp performance difference example
package cg;
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author Sergey Mashkov (cy6erGn0m)
* @since 03.03.13
*/
public class JavaRegexpTest {
private static void generateFile(Path p) throws IOException {
try (Writer w = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(p.toFile()), "ISO-8859-1"), 65536)) {
for (long i = 0; i < 14000000L; ++i) {
w.write(Long.toString(i));
}
}
}
public static void main(String[] args) throws Exception {
Path test = Paths.get(".", "test.txt");
if (Files.notExists(test)) {
generateFile(test);
}
int size = (int)Files.size(test);
byte [] buffer = new byte[size];
try (FileInputStream fis = new FileInputStream(test.toFile())) {
int rc;
int complete = 0;
while (complete < buffer.length && (rc = fis.read(buffer, complete, buffer.length - complete)) != -1) {
complete += rc;
}
if (complete < buffer.length) {
throw new IllegalStateException();
}
}
CharSequence text = new DummyCharSequence(buffer);
for (int i = 0; i < 100; ++i) {
doTest1(text);
doTest2(text);
}
System.out.println("final");
doTest1(text);
doTest2(text);
}
private static void doTest1(CharSequence text) {
long start = System.currentTimeMillis();
Matcher m = Pattern.compile("[0123]123456|98765", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE).matcher(text);
int count = 0;
while (m.find()) {
++count;
}
// System.out.println(count);
if (count > 0) {
System.out.println("time1: " + (System.currentTimeMillis() - start) + " ms");
}
}
private static void doTest2(CharSequence text) {
long start = System.currentTimeMillis();
Matcher m = Pattern.compile("[0123]123456", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE).matcher(text);
int count = 0;
while (m.find()) {
++count;
}
m = Pattern.compile("98765").matcher(text);
while (m.find()) {
++count;
}
System.out.println(count);
System.out.println("time2: " + (System.currentTimeMillis() - start) + " ms");
}
}
class DummyCharSequence implements CharSequence {
private final byte [] array;
DummyCharSequence(byte[] array) {
this.array = array;
}
@Override
public int length() {
return array.length;
}
@Override
public char charAt(int index) {
return (char) array[index];
}
@Override
public CharSequence subSequence(int start, int end) {
throw new UnsupportedOperationException();
}
}
time1: 2706 ms
time2: 977 ms
time1: 2736 ms
time2: 964 ms
time1: 2745 ms
time2: 980 ms
time1: 2748 ms
time2: 968 ms
time1: 2747 ms
time2: 968 ms
time1: 2752 ms
time2: 967 ms
time1: 2739 ms
time2: 964 ms
time1: 2758 ms
time2: 970 ms
time1: 2779 ms
time2: 967 ms
time1: 2759 ms
time2: 971 ms
time1: 2803 ms
time2: 966 ms
time1: 2759 ms
time2: 968 ms
time1: 2743 ms
time2: 968 ms
time1: 2764 ms
time2: 969 ms
time1: 2751 ms
time2: 969 ms
time1: 2772 ms
time2: 967 ms
time1: 2778 ms
time2: 971 ms
time1: 2757 ms
time2: 969 ms
time1: 2772 ms
time2: 974 ms
time1: 2755 ms
time2: 968 ms
time1: 2760 ms
time2: 965 ms
time1: 2760 ms
time2: 975 ms
time1: 2756 ms
time2: 967 ms
time1: 2761 ms
time2: 967 ms
time1: 2779 ms
time2: 969 ms
time1: 2767 ms
time2: 968 ms
time1: 2758 ms
time2: 971 ms
time1: 2760 ms
time2: 965 ms
time1: 2768 ms
time2: 974 ms
time1: 2773 ms
time2: 969 ms
time1: 2776 ms
time2: 975 ms
time1: 2778 ms
time2: 969 ms
time1: 2771 ms
time2: 967 ms
time1: 2764 ms
time2: 967 ms
time1: 2805 ms
time2: 971 ms
time1: 2755 ms
time2: 972 ms
time1: 2765 ms
time2: 967 ms
time1: 2950 ms
time2: 968 ms
time1: 2760 ms
time2: 967 ms
time1: 2775 ms
time2: 970 ms
time1: 2771 ms
time2: 969 ms
time1: 2783 ms
time2: 974 ms
time1: 2764 ms
time2: 973 ms
time1: 2746 ms
time2: 966 ms
time1: 2756 ms
time2: 971 ms
time1: 2762 ms
time2: 969 ms
time1: 2748 ms
time2: 973 ms
time1: 2772 ms
time2: 971 ms
time1: 2776 ms
time2: 967 ms
time1: 2769 ms
time2: 970 ms
time1: 2754 ms
time2: 965 ms
time1: 2744 ms
time2: 967 ms
time1: 2760 ms
time2: 965 ms
time1: 2751 ms
time2: 966 ms
time1: 2757 ms
time2: 968 ms
time1: 2773 ms
time2: 974 ms
time1: 2767 ms
time2: 965 ms
time1: 2768 ms
time2: 966 ms
time1: 2753 ms
time2: 963 ms
time1: 2750 ms
time2: 963 ms
time1: 2770 ms
time2: 967 ms
time1: 2793 ms
time2: 967 ms
time1: 2767 ms
time2: 966 ms
time1: 2756 ms
time2: 968 ms
time1: 2907 ms
time2: 971 ms
time1: 2765 ms
time2: 966 ms
time1: 2767 ms
time2: 974 ms
time1: 2805 ms
time2: 971 ms
time1: 2762 ms
time2: 967 ms
time1: 2764 ms
time2: 963 ms
time1: 2754 ms
time2: 967 ms
time1: 2766 ms
time2: 966 ms
time1: 2764 ms
time2: 998 ms
time1: 2771 ms
time2: 964 ms
time1: 2761 ms
time2: 968 ms
time1: 2753 ms
time2: 966 ms
time1: 2766 ms
time2: 965 ms
time1: 2754 ms
time2: 964 ms
time1: 2755 ms
time2: 965 ms
time1: 2775 ms
time2: 966 ms
time1: 2769 ms
time2: 963 ms
time1: 2769 ms
time2: 965 ms
time1: 2753 ms
time2: 963 ms
time1: 2755 ms
time2: 961 ms
time1: 2755 ms
time2: 967 ms
time1: 2768 ms
time2: 967 ms
time1: 2760 ms
time2: 969 ms
time1: 2758 ms
time2: 966 ms
time1: 2754 ms
time2: 963 ms
time1: 2757 ms
time2: 963 ms
time1: 2774 ms
time2: 966 ms
time1: 2777 ms
time2: 969 ms
time1: 2762 ms
time2: 966 ms
time1: 2806 ms
time2: 967 ms
time1: 2775 ms
time2: 963 ms
time1: 2763 ms
time2: 968 ms
time1: 2967 ms
time2: 962 ms
time1: 2761 ms
time2: 964 ms
time1: 2761 ms
time2: 964 ms
time1: 2769 ms
time2: 962 ms
time1: 2758 ms
time2: 965 ms
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment