-
-
Save twillouer/e4180390dc503fe886d0 to your computer and use it in GitHub Desktop.
@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) | |
@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) | |
@Fork(3) | |
@BenchmarkMode(Mode.Throughput) | |
@OutputTimeUnit(TimeUnit.NANOSECONDS) | |
@State(Scope.Benchmark) | |
public class ReplaceAllBench { | |
@Param({ "10", "100", "1000" }) | |
private int size; | |
String string; | |
@Setup | |
public void setup() throws Throwable | |
{ | |
string = randomAlphanumericString(size / 3) + "\n\r " + randomAlphanumericString(size / 3) + "\n " | |
+ randomAlphanumericString(size / 3); | |
} | |
static class Replacer { | |
private static final Pattern COMPILE = Pattern.compile("[\n\r]+ "); | |
static String unfold_original(String s) | |
{ | |
s = s.replaceAll("\n\r ", ""); | |
s = s.replaceAll("\r\n ", ""); | |
s = s.replaceAll("\n ", ""); | |
s = s.replaceAll("\r ", ""); | |
return s; | |
} | |
static String unfold_regexp(String s) | |
{ | |
s = s.replaceAll("\n\r |\r\n |\n |\r ", ""); | |
return s; | |
} | |
static String unfold_regexpcompiled(String s) | |
{ | |
s = COMPILE.matcher(s).replaceAll(""); | |
return s; | |
} | |
private enum NLStatus { | |
NONE, RC, NL, RC_NL, NL_RC; | |
} | |
/** | |
* Remove all the '\n' and ’\r' followed by a ' ' from a LDIF String. | |
* | |
* @param s The String to unfold | |
* @return The resulting String | |
*/ | |
protected static String unfold_optim_emmanuel_1(String s) | |
{ | |
int pos = 0; | |
char[] unfold = new char[s.length()]; | |
NLStatus newLine = NLStatus.NONE; | |
for (char c : s.toCharArray()) { | |
switch (c) { | |
case '\n': | |
switch (newLine) { | |
case NONE: | |
newLine = NLStatus.NL; | |
break; | |
case RC: | |
newLine = NLStatus.RC_NL; | |
break; | |
case NL: | |
unfold[pos++] = '\n'; | |
break; | |
case RC_NL: | |
unfold[pos++] = '\r'; | |
unfold[pos++] = '\n'; | |
newLine = NLStatus.NL; | |
break; | |
case NL_RC: | |
unfold[pos++] = '\n'; | |
unfold[pos++] = '\r'; | |
newLine = NLStatus.NL; | |
break; | |
} | |
break; | |
case '\r': | |
switch (newLine) { | |
case NONE: | |
newLine = NLStatus.RC; | |
break; | |
case NL: | |
newLine = NLStatus.NL_RC; | |
break; | |
case RC: | |
unfold[pos++] = '\r'; | |
break; | |
case RC_NL: | |
unfold[pos++] = '\r'; | |
unfold[pos++] = '\n'; | |
newLine = NLStatus.RC; | |
break; | |
case NL_RC: | |
unfold[pos++] = '\n'; | |
unfold[pos++] = '\r'; | |
newLine = NLStatus.RC; | |
break; | |
} | |
break; | |
case ' ': | |
if (newLine == NLStatus.NONE) { | |
unfold[pos++] = c; | |
} else { | |
newLine = NLStatus.NONE; | |
} | |
break; | |
default: | |
switch (newLine) { | |
case NONE: | |
break; | |
case NL: | |
unfold[pos++] = '\n'; | |
newLine = NLStatus.NONE; | |
break; | |
case RC: | |
unfold[pos++] = '\r'; | |
newLine = NLStatus.NONE; | |
break; | |
case NL_RC: | |
unfold[pos++] = '\n'; | |
unfold[pos++] = '\r'; | |
newLine = NLStatus.NONE; | |
break; | |
case RC_NL: | |
unfold[pos++] = '\r'; | |
unfold[pos++] = '\n'; | |
newLine = NLStatus.NONE; | |
break; | |
} | |
unfold[pos++] = c; | |
} | |
} | |
switch (newLine) { | |
case NONE: | |
break; | |
case NL: | |
unfold[pos++] = '\n'; | |
break; | |
case RC: | |
unfold[pos++] = '\r'; | |
break; | |
case NL_RC: | |
unfold[pos++] = '\n'; | |
unfold[pos++] = '\r'; | |
break; | |
case RC_NL: | |
unfold[pos++] = '\r'; | |
unfold[pos++] = '\n'; | |
break; | |
} | |
return new String(unfold, 0, pos); | |
} | |
private static final String[] TODO = { "\n\r ", "\r\n ", "\r ", "\n " }; | |
private static final String[] TO = { "", "", "", "" }; | |
protected static String unfold_with_stringutils_on_apache_common(final String string) | |
{ | |
return StringUtils.replaceEach(string, TODO, TO); | |
} | |
public static String unfold_olivier(String test) | |
{ | |
// Null -> null | |
if (test == null) { | |
return null; | |
} | |
// 0 or 1 char | |
if (test.length() < 2) | |
return test; | |
// 2 chars | |
if (test.length() == 2) { | |
if (test.charAt(1) == ' ') { | |
char c0 = test.charAt(0); | |
if (c0 == '\r' || c0 == '\n') { | |
return ""; | |
} | |
} | |
return test; | |
} | |
// More than 2 chars | |
char[] chars = test.toCharArray(); | |
char[] dest = new char[chars.length]; | |
int p = chars.length - 1; | |
int d = p; | |
while (p >= 2) { | |
char c = chars[p]; | |
// Not a space : keep as is | |
if (c != ' ') { | |
dest[d] = c; | |
p--; | |
d--; | |
} | |
// Space | |
else { | |
char c1 = chars[p - 1]; | |
// Previous char is special : investigate deeper | |
if (c1 == '\r' || c1 == '\n') { | |
p--; | |
char c2 = chars[p - 1]; | |
if ((c2 == '\r' || c2 == '\n') && c2 != c1) { | |
p--; | |
} | |
p--; | |
} | |
// It was just a space : keep it | |
else { | |
dest[d] = c; | |
p--; | |
d--; | |
} | |
} | |
} | |
// Keep the remaining chars as it (special cases already covered) | |
while (p >= 0) { | |
dest[d--] = chars[p--]; | |
} | |
return new String(dest, d + 1, chars.length - d - 1); | |
} | |
public static String unfold_olivier_optimise_par_emmanuel(String test) | |
{ | |
// Null -> null | |
if ( test == null ) | |
{ | |
return null; | |
} | |
char[] chars = test.toCharArray(); | |
// 0 or 1 char | |
if ( chars.length < 2 ) | |
{ | |
return test; | |
} | |
// 2 chars | |
if ( chars.length == 2 ) | |
{ | |
if ( chars[1] == ' ' ) | |
{ | |
if ( chars[0] == '\r' || chars[0] == '\n' ) | |
{ | |
return ""; | |
} | |
} | |
return test; | |
} | |
// More than 2 chars | |
int p = chars.length - 1; | |
int d = p; | |
while ( p >= 0 ) | |
{ | |
char c = chars[p]; | |
// Not a space : keep as is | |
if ( c != ' ' ) | |
{ | |
chars[d] = c; | |
p--; | |
d--; | |
} | |
// Space | |
else | |
{ | |
char c1 = chars[p - 1]; | |
// Previous char is special : investigate deeper | |
if ( c1 == '\r' ) | |
{ | |
if ( ( chars[p - 2] == '\n' ) ) | |
{ | |
p -= 3; | |
} | |
else | |
{ | |
p -= 2; | |
} | |
} | |
else if ( c1 == '\n' ) | |
{ | |
if ( ( chars[p - 2] == '\r' ) ) | |
{ | |
p -= 3; | |
} | |
else | |
{ | |
p -= 2; | |
} | |
} | |
// It was just a space : keep it | |
else | |
{ | |
chars[d] = c; | |
p--; | |
d--; | |
} | |
} | |
} | |
// Keep the remaining chars as it (special cases already covered) | |
while ( p >= 0 ) | |
{ | |
chars[d--] = chars[p--]; | |
} | |
return new String( chars, d + 1, chars.length - d - 1 ); | |
} | |
} | |
@Benchmark | |
public String unfold_original() | |
{ | |
return Replacer.unfold_original(string); | |
} | |
@Benchmark | |
public String unfold_regexp() | |
{ | |
return Replacer.unfold_regexp(string); | |
} | |
@Benchmark | |
public String unfold_regexpcompiled() | |
{ | |
return Replacer.unfold_regexpcompiled(string); | |
} | |
@Benchmark | |
public String unfold_optim_emmanuel_1() | |
{ | |
return Replacer.unfold_optim_emmanuel_1(string); | |
} | |
@Benchmark | |
public String unfold_with_stringutils_on_apache_common() | |
{ | |
return Replacer.unfold_with_stringutils_on_apache_common(string); | |
} | |
@Benchmark | |
public String unfold_unfold_olivier() | |
{ | |
return Replacer.unfold_olivier(string); | |
} | |
@Benchmark | |
public String unfold_olivier_optimise_par_emmanuel() | |
{ | |
return Replacer.unfold_olivier_optimise_par_emmanuel(string); | |
} | |
public static void main(String[] args) throws RunnerException, IOException | |
{ | |
Options opt = new OptionsBuilder().include(".*" + ReplaceAllBench.class.getSimpleName() + ".*") | |
.warmupIterations(20) | |
.warmupTime(TimeValue.seconds(1)) | |
.measurementIterations(20) | |
.timeUnit(TimeUnit.NANOSECONDS) | |
.forks(1) | |
// .addProfiler(LinuxPerfAsmProfiler.class) | |
.build(); | |
new Runner(opt).run(); | |
} | |
} |
En fait, pas sûr que ce soit plus rapide. Le s.toCharArray() créé un nouveay char[] et copie l'original dedans. Le gain éventuel est sur la suppression du s.charAt(i), avec élimination des deux tests sur les frontières.
Version un peu abusée niveau lisibilité, mais qui selon mes tests est la plus rapide:
static String unfold_cedric_ultimate(String s) {
if (s == null || s.length() < 2) {
return s;
}
int len = s.length();
char p1 = 'x';
char p2 = 'x';
char[] chars = s.toCharArray();
int wrtAt = 0;
for (int i = 0; i < len; i++) {
char c = chars[i];
try {
if (' ' == c) {
if ('\n' == p1) {
if ('\r' == p2) {
wrtAt--;
}
wrtAt--;
continue;
}
if ('\r' == p1) {
if ('\n' == p2) {
wrtAt--;
}
wrtAt--;
continue;
}
}
chars[wrtAt++] = c;
} finally {
p2 = p1;
p1 = c;
}
}
return new String(chars, 0, wrtAt);
}
Bon, là on est pas trop mal...
static String unfold_cedric_ultimate(String s) {
if (s == null || s.length() < 2) {
return s;
}
char p1 = 'x';
char p2 = 'x';
char[] chars = s.toCharArray();
int wrtAt = 0;
for (char c : chars) {
chars[wrtAt++] = c;
if (' ' == c) {
if ('\n' == p1) {
if ('\r' == p2) {
wrtAt--;
}
wrtAt--;
wrtAt--;
}
if ('\r' == p1) {
if ('\n' == p2) {
wrtAt--;
}
wrtAt--;
wrtAt--;
}
}
p2 = p1;
p1 = c;
}
return new String(chars, 0, wrtAt);
}
After running the full benchmark:
The following methods are deemed to be correct:
Reflection.unfold_cedric_ultimate2
Reflection.unfold_cedric
Reflection.unfold_cedric_improved
Reflection.unfold_regexp
Reflection.unfold_cedric_ultimate
Reflection.unfold_common
The following methods are proved to be incorrect:
Reflection.unfold_regexpcompiled
Reflection.unfold
Reflection.unfold3
Reflection.unfold2
Reflection.unfold_olivier
# Run complete. Total time: 00:26:35
Benchmark (size) Mode Samples Score Error Units
r.ReplaceAllBenchmark.unfold_all_regexp 10 thrpt 20 1571.746 ± 29.372 ops/ms
r.ReplaceAllBenchmark.unfold_all_regexp 100 thrpt 20 345.773 ± 2.164 ops/ms
r.ReplaceAllBenchmark.unfold_all_regexp 1000 thrpt 20 39.093 ± 0.554 ops/ms
r.ReplaceAllBenchmark.unfold_cedric 10 thrpt 20 25168.509 ± 525.919 ops/ms
r.ReplaceAllBenchmark.unfold_cedric 100 thrpt 20 7124.064 ± 223.885 ops/ms
r.ReplaceAllBenchmark.unfold_cedric 1000 thrpt 20 814.544 ± 16.060 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_improved 10 thrpt 20 33391.135 ± 395.195 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_improved 100 thrpt 20 9804.861 ± 104.523 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_improved 1000 thrpt 20 906.952 ± 12.452 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_ultimate 10 thrpt 20 35065.155 ± 696.067 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_ultimate 100 thrpt 20 9382.235 ± 138.609 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_ultimate 1000 thrpt 20 750.416 ± 15.975 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_ultimate2 10 thrpt 20 31528.472 ± 183.737 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_ultimate2 100 thrpt 20 9378.629 ± 143.134 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_ultimate2 1000 thrpt 20 862.118 ± 18.347 ops/ms
r.ReplaceAllBenchmark.unfold_compiled_regexp 10 thrpt 20 4522.387 ± 120.559 ops/ms
r.ReplaceAllBenchmark.unfold_compiled_regexp 100 thrpt 20 813.303 ± 14.269 ops/ms
r.ReplaceAllBenchmark.unfold_compiled_regexp 1000 thrpt 20 100.297 ± 2.482 ops/ms
r.ReplaceAllBenchmark.unfold_original 10 thrpt 20 1137.678 ± 11.718 ops/ms
r.ReplaceAllBenchmark.unfold_original 100 thrpt 20 420.988 ± 15.005 ops/ms
r.ReplaceAllBenchmark.unfold_original 1000 thrpt 20 54.039 ± 1.534 ops/ms
r.ReplaceAllBenchmark.unfold_unfold_common 10 thrpt 20 6582.421 ± 232.638 ops/ms
r.ReplaceAllBenchmark.unfold_unfold_common 100 thrpt 20 1803.557 ± 61.737 ops/ms
r.ReplaceAllBenchmark.unfold_unfold_common 1000 thrpt 20 113.271 ± 1.129 ops/ms
r.ReplaceAllBenchmark.unfold_unfold_olivier 10 thrpt 20 21048.183 ± 509.316 ops/ms
r.ReplaceAllBenchmark.unfold_unfold_olivier 100 thrpt 20 6366.979 ± 81.390 ops/ms
r.ReplaceAllBenchmark.unfold_unfold_olivier 1000 thrpt 20 578.769 ± 10.877 ops/ms
r.ReplaceAllBenchmark.unfold_unfold_optim_olivier 10 thrpt 20 34515.031 ± 344.551 ops/ms
r.ReplaceAllBenchmark.unfold_unfold_optim_olivier 100 thrpt 20 7551.517 ± 53.829 ops/ms
r.ReplaceAllBenchmark.unfold_unfold_optim_olivier 1000 thrpt 20 673.183 ± 11.679 ops/ms
r.ReplaceAllBenchmark.unfold_very_complicated 10 thrpt 20 13886.697 ± 366.905 ops/ms
r.ReplaceAllBenchmark.unfold_very_complicated 100 thrpt 20 3173.519 ± 153.616 ops/ms
r.ReplaceAllBenchmark.unfold_very_complicated 1000 thrpt 20 285.992 ± 15.024 ops/ms
En ne gardant que les algos corrects:
# Run complete. Total time: 00:14:30
Benchmark (size) Mode Samples Score Error Units
r.ReplaceAllBenchmark.unfold_all_regexp 10 thrpt 20 1467.862 ± 28.884 ops/ms
r.ReplaceAllBenchmark.unfold_all_regexp 100 thrpt 20 342.911 ± 4.772 ops/ms
r.ReplaceAllBenchmark.unfold_all_regexp 1000 thrpt 20 39.269 ± 0.936 ops/ms
r.ReplaceAllBenchmark.unfold_cedric 10 thrpt 20 24729.376 ± 935.766 ops/ms
r.ReplaceAllBenchmark.unfold_cedric 100 thrpt 20 7074.242 ± 53.409 ops/ms
r.ReplaceAllBenchmark.unfold_cedric 1000 thrpt 20 809.072 ± 16.939 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_improved 10 thrpt 20 36381.822 ± 263.571 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_improved 100 thrpt 20 9391.023 ± 60.022 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_improved 1000 thrpt 20 720.513 ± 5.490 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_ultimate 10 thrpt 20 36553.386 ± 367.102 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_ultimate 100 thrpt 20 9565.954 ± 142.565 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_ultimate 1000 thrpt 20 877.977 ± 11.356 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_ultimate2 10 thrpt 20 32364.978 ± 706.792 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_ultimate2 100 thrpt 20 9668.940 ± 72.017 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_ultimate2 1000 thrpt 20 932.645 ± 5.214 ops/ms
r.ReplaceAllBenchmark.unfold_unfold_common 10 thrpt 20 6869.541 ± 133.914 ops/ms
r.ReplaceAllBenchmark.unfold_unfold_common 100 thrpt 20 1739.391 ± 18.654 ops/ms
r.ReplaceAllBenchmark.unfold_unfold_common 1000 thrpt 20 133.172 ± 2.303 ops/ms
Hey, mon algo était correct normalement ^^;
Quel est le problème ? Quels sont les cas qui ne passent pas ?
C'est pas moi qui ait dit qu'il était faux, c'est la machine. Sur des cas concrets :) Tu peux regarder ce que fait le main
pour trouver des exemples qui plantent. De mémoire le tien se plantait sur des séquences du genre \r\r (mais j'ai un doute, ils ont tous des pbs différents :)).
Hmmm, effectivement, un cas n'était pas géré. J'en ai profité pour essayer d'améliorer un peu l'algo. Tu peux le tester ?
public static String unfold_olivier2(String test) {
// Throws NPE if null, like the original method
if (test.length() < 2) return test;
char[] chars = test.toCharArray();
int p = chars.length - 1;
int d = p;
char c, c1, c2;
while (p > 0) {
c = chars[p];
c1 = chars[p - 1];
if (c == ' ' && (c1 == '\n' || c1 == '\r')) {
p--;
if (p > 0) {
c2 = chars[p - 1];
if ((c2 == '\n' || c2 == '\r') && c2 != c1) {
p--;
}
}
}
else {
chars[d] = c;
d--;
}
p--;
}
while (p >= 0) {
chars[d--] = chars[p--];
}
return new String(chars, d + 1, chars.length - d - 1);
}
Une version améliorée de la propale de Cédric, où on réutilise le char array au lieu d'en créer un :