Created
March 3, 2013 19:09
-
-
Save cy6erGn0m/5077720 to your computer and use it in GitHub Desktop.
Regexp performance difference example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package cg; | |
import java.io.*; | |
import java.nio.file.Files; | |
import java.nio.file.Path; | |
import java.nio.file.Paths; | |
import java.util.regex.Matcher; | |
import java.util.regex.Pattern; | |
/** | |
* @author Sergey Mashkov (cy6erGn0m) | |
* @since 03.03.13 | |
*/ | |
public class JavaRegexpTest { | |
private static void generateFile(Path p) throws IOException { | |
try (Writer w = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(p.toFile()), "ISO-8859-1"), 65536)) { | |
for (long i = 0; i < 14000000L; ++i) { | |
w.write(Long.toString(i)); | |
} | |
} | |
} | |
public static void main(String[] args) throws Exception { | |
Path test = Paths.get(".", "test.txt"); | |
if (Files.notExists(test)) { | |
generateFile(test); | |
} | |
int size = (int)Files.size(test); | |
byte [] buffer = new byte[size]; | |
try (FileInputStream fis = new FileInputStream(test.toFile())) { | |
int rc; | |
int complete = 0; | |
while (complete < buffer.length && (rc = fis.read(buffer, complete, buffer.length - complete)) != -1) { | |
complete += rc; | |
} | |
if (complete < buffer.length) { | |
throw new IllegalStateException(); | |
} | |
} | |
CharSequence text = new DummyCharSequence(buffer); | |
for (int i = 0; i < 100; ++i) { | |
doTest1(text); | |
doTest2(text); | |
} | |
System.out.println("final"); | |
doTest1(text); | |
doTest2(text); | |
} | |
private static void doTest1(CharSequence text) { | |
long start = System.currentTimeMillis(); | |
Matcher m = Pattern.compile("[0123]123456|98765", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE).matcher(text); | |
int count = 0; | |
while (m.find()) { | |
++count; | |
} | |
// System.out.println(count); | |
if (count > 0) { | |
System.out.println("time1: " + (System.currentTimeMillis() - start) + " ms"); | |
} | |
} | |
private static void doTest2(CharSequence text) { | |
long start = System.currentTimeMillis(); | |
Matcher m = Pattern.compile("[0123]123456", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE).matcher(text); | |
int count = 0; | |
while (m.find()) { | |
++count; | |
} | |
m = Pattern.compile("98765").matcher(text); | |
while (m.find()) { | |
++count; | |
} | |
System.out.println(count); | |
System.out.println("time2: " + (System.currentTimeMillis() - start) + " ms"); | |
} | |
} | |
class DummyCharSequence implements CharSequence { | |
private final byte [] array; | |
DummyCharSequence(byte[] array) { | |
this.array = array; | |
} | |
@Override | |
public int length() { | |
return array.length; | |
} | |
@Override | |
public char charAt(int index) { | |
return (char) array[index]; | |
} | |
@Override | |
public CharSequence subSequence(int start, int end) { | |
throw new UnsupportedOperationException(); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
time1: 2706 ms | |
time2: 977 ms | |
time1: 2736 ms | |
time2: 964 ms | |
time1: 2745 ms | |
time2: 980 ms | |
time1: 2748 ms | |
time2: 968 ms | |
time1: 2747 ms | |
time2: 968 ms | |
time1: 2752 ms | |
time2: 967 ms | |
time1: 2739 ms | |
time2: 964 ms | |
time1: 2758 ms | |
time2: 970 ms | |
time1: 2779 ms | |
time2: 967 ms | |
time1: 2759 ms | |
time2: 971 ms | |
time1: 2803 ms | |
time2: 966 ms | |
time1: 2759 ms | |
time2: 968 ms | |
time1: 2743 ms | |
time2: 968 ms | |
time1: 2764 ms | |
time2: 969 ms | |
time1: 2751 ms | |
time2: 969 ms | |
time1: 2772 ms | |
time2: 967 ms | |
time1: 2778 ms | |
time2: 971 ms | |
time1: 2757 ms | |
time2: 969 ms | |
time1: 2772 ms | |
time2: 974 ms | |
time1: 2755 ms | |
time2: 968 ms | |
time1: 2760 ms | |
time2: 965 ms | |
time1: 2760 ms | |
time2: 975 ms | |
time1: 2756 ms | |
time2: 967 ms | |
time1: 2761 ms | |
time2: 967 ms | |
time1: 2779 ms | |
time2: 969 ms | |
time1: 2767 ms | |
time2: 968 ms | |
time1: 2758 ms | |
time2: 971 ms | |
time1: 2760 ms | |
time2: 965 ms | |
time1: 2768 ms | |
time2: 974 ms | |
time1: 2773 ms | |
time2: 969 ms | |
time1: 2776 ms | |
time2: 975 ms | |
time1: 2778 ms | |
time2: 969 ms | |
time1: 2771 ms | |
time2: 967 ms | |
time1: 2764 ms | |
time2: 967 ms | |
time1: 2805 ms | |
time2: 971 ms | |
time1: 2755 ms | |
time2: 972 ms | |
time1: 2765 ms | |
time2: 967 ms | |
time1: 2950 ms | |
time2: 968 ms | |
time1: 2760 ms | |
time2: 967 ms | |
time1: 2775 ms | |
time2: 970 ms | |
time1: 2771 ms | |
time2: 969 ms | |
time1: 2783 ms | |
time2: 974 ms | |
time1: 2764 ms | |
time2: 973 ms | |
time1: 2746 ms | |
time2: 966 ms | |
time1: 2756 ms | |
time2: 971 ms | |
time1: 2762 ms | |
time2: 969 ms | |
time1: 2748 ms | |
time2: 973 ms | |
time1: 2772 ms | |
time2: 971 ms | |
time1: 2776 ms | |
time2: 967 ms | |
time1: 2769 ms | |
time2: 970 ms | |
time1: 2754 ms | |
time2: 965 ms | |
time1: 2744 ms | |
time2: 967 ms | |
time1: 2760 ms | |
time2: 965 ms | |
time1: 2751 ms | |
time2: 966 ms | |
time1: 2757 ms | |
time2: 968 ms | |
time1: 2773 ms | |
time2: 974 ms | |
time1: 2767 ms | |
time2: 965 ms | |
time1: 2768 ms | |
time2: 966 ms | |
time1: 2753 ms | |
time2: 963 ms | |
time1: 2750 ms | |
time2: 963 ms | |
time1: 2770 ms | |
time2: 967 ms | |
time1: 2793 ms | |
time2: 967 ms | |
time1: 2767 ms | |
time2: 966 ms | |
time1: 2756 ms | |
time2: 968 ms | |
time1: 2907 ms | |
time2: 971 ms | |
time1: 2765 ms | |
time2: 966 ms | |
time1: 2767 ms | |
time2: 974 ms | |
time1: 2805 ms | |
time2: 971 ms | |
time1: 2762 ms | |
time2: 967 ms | |
time1: 2764 ms | |
time2: 963 ms | |
time1: 2754 ms | |
time2: 967 ms | |
time1: 2766 ms | |
time2: 966 ms | |
time1: 2764 ms | |
time2: 998 ms | |
time1: 2771 ms | |
time2: 964 ms | |
time1: 2761 ms | |
time2: 968 ms | |
time1: 2753 ms | |
time2: 966 ms | |
time1: 2766 ms | |
time2: 965 ms | |
time1: 2754 ms | |
time2: 964 ms | |
time1: 2755 ms | |
time2: 965 ms | |
time1: 2775 ms | |
time2: 966 ms | |
time1: 2769 ms | |
time2: 963 ms | |
time1: 2769 ms | |
time2: 965 ms | |
time1: 2753 ms | |
time2: 963 ms | |
time1: 2755 ms | |
time2: 961 ms | |
time1: 2755 ms | |
time2: 967 ms | |
time1: 2768 ms | |
time2: 967 ms | |
time1: 2760 ms | |
time2: 969 ms | |
time1: 2758 ms | |
time2: 966 ms | |
time1: 2754 ms | |
time2: 963 ms | |
time1: 2757 ms | |
time2: 963 ms | |
time1: 2774 ms | |
time2: 966 ms | |
time1: 2777 ms | |
time2: 969 ms | |
time1: 2762 ms | |
time2: 966 ms | |
time1: 2806 ms | |
time2: 967 ms | |
time1: 2775 ms | |
time2: 963 ms | |
time1: 2763 ms | |
time2: 968 ms | |
time1: 2967 ms | |
time2: 962 ms | |
time1: 2761 ms | |
time2: 964 ms | |
time1: 2761 ms | |
time2: 964 ms | |
time1: 2769 ms | |
time2: 962 ms | |
time1: 2758 ms | |
time2: 965 ms |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment