-
-
Save twillouer/e4180390dc503fe886d0 to your computer and use it in GitHub Desktop.
@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) | |
@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) | |
@Fork(3) | |
@BenchmarkMode(Mode.Throughput) | |
@OutputTimeUnit(TimeUnit.NANOSECONDS) | |
@State(Scope.Benchmark) | |
public class ReplaceAllBench { | |
@Param({ "10", "100", "1000" }) | |
private int size; | |
String string; | |
@Setup | |
public void setup() throws Throwable | |
{ | |
string = randomAlphanumericString(size / 3) + "\n\r " + randomAlphanumericString(size / 3) + "\n " | |
+ randomAlphanumericString(size / 3); | |
} | |
static class Replacer { | |
private static final Pattern COMPILE = Pattern.compile("[\n\r]+ "); | |
static String unfold_original(String s) | |
{ | |
s = s.replaceAll("\n\r ", ""); | |
s = s.replaceAll("\r\n ", ""); | |
s = s.replaceAll("\n ", ""); | |
s = s.replaceAll("\r ", ""); | |
return s; | |
} | |
static String unfold_regexp(String s) | |
{ | |
s = s.replaceAll("\n\r |\r\n |\n |\r ", ""); | |
return s; | |
} | |
static String unfold_regexpcompiled(String s) | |
{ | |
s = COMPILE.matcher(s).replaceAll(""); | |
return s; | |
} | |
private enum NLStatus { | |
NONE, RC, NL, RC_NL, NL_RC; | |
} | |
/** | |
* Remove all the '\n' and ’\r' followed by a ' ' from a LDIF String. | |
* | |
* @param s The String to unfold | |
* @return The resulting String | |
*/ | |
protected static String unfold_optim_emmanuel_1(String s) | |
{ | |
int pos = 0; | |
char[] unfold = new char[s.length()]; | |
NLStatus newLine = NLStatus.NONE; | |
for (char c : s.toCharArray()) { | |
switch (c) { | |
case '\n': | |
switch (newLine) { | |
case NONE: | |
newLine = NLStatus.NL; | |
break; | |
case RC: | |
newLine = NLStatus.RC_NL; | |
break; | |
case NL: | |
unfold[pos++] = '\n'; | |
break; | |
case RC_NL: | |
unfold[pos++] = '\r'; | |
unfold[pos++] = '\n'; | |
newLine = NLStatus.NL; | |
break; | |
case NL_RC: | |
unfold[pos++] = '\n'; | |
unfold[pos++] = '\r'; | |
newLine = NLStatus.NL; | |
break; | |
} | |
break; | |
case '\r': | |
switch (newLine) { | |
case NONE: | |
newLine = NLStatus.RC; | |
break; | |
case NL: | |
newLine = NLStatus.NL_RC; | |
break; | |
case RC: | |
unfold[pos++] = '\r'; | |
break; | |
case RC_NL: | |
unfold[pos++] = '\r'; | |
unfold[pos++] = '\n'; | |
newLine = NLStatus.RC; | |
break; | |
case NL_RC: | |
unfold[pos++] = '\n'; | |
unfold[pos++] = '\r'; | |
newLine = NLStatus.RC; | |
break; | |
} | |
break; | |
case ' ': | |
if (newLine == NLStatus.NONE) { | |
unfold[pos++] = c; | |
} else { | |
newLine = NLStatus.NONE; | |
} | |
break; | |
default: | |
switch (newLine) { | |
case NONE: | |
break; | |
case NL: | |
unfold[pos++] = '\n'; | |
newLine = NLStatus.NONE; | |
break; | |
case RC: | |
unfold[pos++] = '\r'; | |
newLine = NLStatus.NONE; | |
break; | |
case NL_RC: | |
unfold[pos++] = '\n'; | |
unfold[pos++] = '\r'; | |
newLine = NLStatus.NONE; | |
break; | |
case RC_NL: | |
unfold[pos++] = '\r'; | |
unfold[pos++] = '\n'; | |
newLine = NLStatus.NONE; | |
break; | |
} | |
unfold[pos++] = c; | |
} | |
} | |
switch (newLine) { | |
case NONE: | |
break; | |
case NL: | |
unfold[pos++] = '\n'; | |
break; | |
case RC: | |
unfold[pos++] = '\r'; | |
break; | |
case NL_RC: | |
unfold[pos++] = '\n'; | |
unfold[pos++] = '\r'; | |
break; | |
case RC_NL: | |
unfold[pos++] = '\r'; | |
unfold[pos++] = '\n'; | |
break; | |
} | |
return new String(unfold, 0, pos); | |
} | |
private static final String[] TODO = { "\n\r ", "\r\n ", "\r ", "\n " }; | |
private static final String[] TO = { "", "", "", "" }; | |
protected static String unfold_with_stringutils_on_apache_common(final String string) | |
{ | |
return StringUtils.replaceEach(string, TODO, TO); | |
} | |
public static String unfold_olivier(String test) | |
{ | |
// Null -> null | |
if (test == null) { | |
return null; | |
} | |
// 0 or 1 char | |
if (test.length() < 2) | |
return test; | |
// 2 chars | |
if (test.length() == 2) { | |
if (test.charAt(1) == ' ') { | |
char c0 = test.charAt(0); | |
if (c0 == '\r' || c0 == '\n') { | |
return ""; | |
} | |
} | |
return test; | |
} | |
// More than 2 chars | |
char[] chars = test.toCharArray(); | |
char[] dest = new char[chars.length]; | |
int p = chars.length - 1; | |
int d = p; | |
while (p >= 2) { | |
char c = chars[p]; | |
// Not a space : keep as is | |
if (c != ' ') { | |
dest[d] = c; | |
p--; | |
d--; | |
} | |
// Space | |
else { | |
char c1 = chars[p - 1]; | |
// Previous char is special : investigate deeper | |
if (c1 == '\r' || c1 == '\n') { | |
p--; | |
char c2 = chars[p - 1]; | |
if ((c2 == '\r' || c2 == '\n') && c2 != c1) { | |
p--; | |
} | |
p--; | |
} | |
// It was just a space : keep it | |
else { | |
dest[d] = c; | |
p--; | |
d--; | |
} | |
} | |
} | |
// Keep the remaining chars as it (special cases already covered) | |
while (p >= 0) { | |
dest[d--] = chars[p--]; | |
} | |
return new String(dest, d + 1, chars.length - d - 1); | |
} | |
public static String unfold_olivier_optimise_par_emmanuel(String test) | |
{ | |
// Null -> null | |
if ( test == null ) | |
{ | |
return null; | |
} | |
char[] chars = test.toCharArray(); | |
// 0 or 1 char | |
if ( chars.length < 2 ) | |
{ | |
return test; | |
} | |
// 2 chars | |
if ( chars.length == 2 ) | |
{ | |
if ( chars[1] == ' ' ) | |
{ | |
if ( chars[0] == '\r' || chars[0] == '\n' ) | |
{ | |
return ""; | |
} | |
} | |
return test; | |
} | |
// More than 2 chars | |
int p = chars.length - 1; | |
int d = p; | |
while ( p >= 0 ) | |
{ | |
char c = chars[p]; | |
// Not a space : keep as is | |
if ( c != ' ' ) | |
{ | |
chars[d] = c; | |
p--; | |
d--; | |
} | |
// Space | |
else | |
{ | |
char c1 = chars[p - 1]; | |
// Previous char is special : investigate deeper | |
if ( c1 == '\r' ) | |
{ | |
if ( ( chars[p - 2] == '\n' ) ) | |
{ | |
p -= 3; | |
} | |
else | |
{ | |
p -= 2; | |
} | |
} | |
else if ( c1 == '\n' ) | |
{ | |
if ( ( chars[p - 2] == '\r' ) ) | |
{ | |
p -= 3; | |
} | |
else | |
{ | |
p -= 2; | |
} | |
} | |
// It was just a space : keep it | |
else | |
{ | |
chars[d] = c; | |
p--; | |
d--; | |
} | |
} | |
} | |
// Keep the remaining chars as it (special cases already covered) | |
while ( p >= 0 ) | |
{ | |
chars[d--] = chars[p--]; | |
} | |
return new String( chars, d + 1, chars.length - d - 1 ); | |
} | |
} | |
@Benchmark | |
public String unfold_original() | |
{ | |
return Replacer.unfold_original(string); | |
} | |
@Benchmark | |
public String unfold_regexp() | |
{ | |
return Replacer.unfold_regexp(string); | |
} | |
@Benchmark | |
public String unfold_regexpcompiled() | |
{ | |
return Replacer.unfold_regexpcompiled(string); | |
} | |
@Benchmark | |
public String unfold_optim_emmanuel_1() | |
{ | |
return Replacer.unfold_optim_emmanuel_1(string); | |
} | |
@Benchmark | |
public String unfold_with_stringutils_on_apache_common() | |
{ | |
return Replacer.unfold_with_stringutils_on_apache_common(string); | |
} | |
@Benchmark | |
public String unfold_unfold_olivier() | |
{ | |
return Replacer.unfold_olivier(string); | |
} | |
@Benchmark | |
public String unfold_olivier_optimise_par_emmanuel() | |
{ | |
return Replacer.unfold_olivier_optimise_par_emmanuel(string); | |
} | |
public static void main(String[] args) throws RunnerException, IOException | |
{ | |
Options opt = new OptionsBuilder().include(".*" + ReplaceAllBench.class.getSimpleName() + ".*") | |
.warmupIterations(20) | |
.warmupTime(TimeValue.seconds(1)) | |
.measurementIterations(20) | |
.timeUnit(TimeUnit.NANOSECONDS) | |
.forks(1) | |
// .addProfiler(LinuxPerfAsmProfiler.class) | |
.build(); | |
new Runner(opt).run(); | |
} | |
} |
J'ai une version modifiée de l'algo d'olivier, mais je suis un peu surpris : il est 20 % plus lent, sans que j'arrive à expliquer pourquoi. Les modifs que j'y ai apportées sont :
- transformation de la String en charArray dès le début
- non-création d'un char array de résultat, utilisation du charArray précédent sur place
- simplification des tests (deux if au lieu d'un or)
- utilisation d'opération entières sur p (p -= 2/3) au lieu de multiples décréments.
Voici le code modifié
public static String unfold3( String test )
{
// Null -> null
if ( test == null )
{
return null;
}
char[] chars = test.toCharArray();
// 0 or 1 char
if ( chars.length < 2 )
{
return test;
}
// 2 chars
if ( chars.length == 2 )
{
if ( chars[1] == ' ' )
{
if ( chars[0] == '\r' || chars[0] == '\n' )
{
return "";
}
}
return test;
}
// More than 2 chars
int p = chars.length - 1;
int d = p;
while ( p >= 0 )
{
char c = chars[p];
// Not a space : keep as is
if ( c != ' ' )
{
chars[d] = c;
p--;
d--;
}
// Space
else
{
char c1 = chars[p - 1];
// Previous char is special : investigate deeper
if ( c1 == '\r' )
{
if ( ( chars[p - 2] == '\n' ) )
{
p -= 3;
}
else
{
p -= 2;
}
}
else if ( c1 == '\n' )
{
if ( ( chars[p - 2] == '\r' ) )
{
p -= 3;
}
else
{
p -= 2;
}
}
// It was just a space : keep it
else
{
chars[d] = c;
p--;
d--;
}
}
}
// Keep the remaining chars as it (special cases already covered)
while ( p >= 0 )
{
chars[d--] = chars[p--];
}
return new String( chars, d + 1, chars.length - d - 1 );
}
Je n'ai aucune idée de la raison pour laquelle j'ai cette perte en performance, mon hypothèse est que la création d'un autre char[] pour le résultat permet à la JVM d'optimiser la création de la String au final.
Attention, les tailles ont changé, elles sont 3 fois plus petites que sur le précédent bench (d'où la différence). D'après ce que tu indiquais, il faut maintenant regarder les lignes "100" pour refléter mieux ton cas d'usage :
Benchmark (size) Mode Cnt Score Error Units
ReplaceAllBench.unfold_all_regexp 10 avgt 15 4169,150 ± 5484,852 ns/op
ReplaceAllBench.unfold_all_regexp 100 avgt 15 4409,728 ± 884,481 ns/op
ReplaceAllBench.unfold_all_regexp 1000 avgt 15 32718,512 ± 3087,518 ns/op
ReplaceAllBench.unfold_compiled_regexp 10 avgt 15 427,047 ± 174,845 ns/op
ReplaceAllBench.unfold_compiled_regexp 100 avgt 15 1390,680 ± 298,333 ns/op
ReplaceAllBench.unfold_compiled_regexp 1000 avgt 15 8400,555 ± 1978,106 ns/op
ReplaceAllBench.unfold_original 10 avgt 15 1209,530 ± 67,974 ns/op
ReplaceAllBench.unfold_original 100 avgt 15 4724,720 ± 1395,322 ns/op
ReplaceAllBench.unfold_original 1000 avgt 15 35329,550 ± 7713,905 ns/op
ReplaceAllBench.unfold_unfold_common 10 avgt 15 291,642 ± 73,267 ns/op
ReplaceAllBench.unfold_unfold_common 100 avgt 15 779,961 ± 179,683 ns/op
ReplaceAllBench.unfold_unfold_common 1000 avgt 15 6111,415 ± 1340,031 ns/op
ReplaceAllBench.unfold_unfold_olivier 10 avgt 15 65,822 ± 2,460 ns/op
ReplaceAllBench.unfold_unfold_olivier 100 avgt 15 230,292 ± 11,081 ns/op
ReplaceAllBench.unfold_unfold_olivier 1000 avgt 15 2191,172 ± 90,903 ns/op
ReplaceAllBench.unfold_unfold_optim_olivier 10 avgt 15 50,816 ± 2,996 ns/op
ReplaceAllBench.unfold_unfold_optim_olivier 100 avgt 15 190,641 ± 11,317 ns/op
ReplaceAllBench.unfold_unfold_optim_olivier 1000 avgt 15 1949,342 ± 76,311 ns/op
ReplaceAllBench.unfold_very_complicated 10 avgt 15 134,907 ± 27,827 ns/op
ReplaceAllBench.unfold_very_complicated 100 avgt 15 1550,192 ± 2567,458 ns/op
ReplaceAllBench.unfold_very_complicated 1000 avgt 15 3155,884 ± 144,358 ns/op
Java 8 ?
Cette version est un chouilla plus lente sur longueur = 10, mais plus rapide sur les chaînes plus grandes. Par ailleurs elle gère les \n\r consécutifs (unfold3
par ex est buggy):
static String unfold_cedric(String s) {
if (s == null || s.length() < 2) {
return s;
}
int len = s.length();
char p1 = 'x';
char p2 = 'x';
char[] sb = new char[len];
int wrtAt = 0;
for (int i = 0; i < len; i++) {
char c = s.charAt(i);
if (' '==c) {
if ('\n' == p1) {
if ('\r' == p2) {
wrtAt--;
}
wrtAt--;
} else if ('\r' == p1) {
if ('\n' == p2) {
wrtAt--;
}
wrtAt--;
} else {
sb[wrtAt++] = c;
}
} else {
sb[wrtAt++] = c;
}
p2 = p1;
p1 = c;
}
return new String(sb, 0, wrtAt);
}
J'obtiens ça:
Benchmark (size) Mode Samples Score Error Units
r.ReplaceAllBenchmark.unfold_cedric 10 thrpt 20 25715.261 ± 196.721 ops/ms
r.ReplaceAllBenchmark.unfold_cedric 100 thrpt 20 8262.439 ± 143.142 ops/ms
r.ReplaceAllBenchmark.unfold_cedric 1000 thrpt 20 792.683 ± 6.258 ops/ms
r.ReplaceAllBenchmark.unfold_unfold_optim_olivier 10 thrpt 20 26383.311 ± 803.548 ops/ms
r.ReplaceAllBenchmark.unfold_unfold_optim_olivier 100 thrpt 20 7203.692 ± 80.246 ops/ms
Pour ref, le bench complet:
package replace;
import org.apache.commons.lang3.StringUtils;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import java.util.Random;
import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;
@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
@Fork(3)
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Benchmark)
public class ReplaceAllBenchmark {
@Param({"10", "100", "1000"})
private int size;
String string;
private final static Random RANDOM = new Random();
private static String randomAlphanumericString(int len) {
StringBuilder sb = new StringBuilder(len);
for (int i = 0; i < len; i++) {
int r = RANDOM.nextInt(10);
switch (r) {
case 0:
sb.append('\r');
break;
case 1:
sb.append('\n');
break;
case 2:
sb.append(' ');
break;
default:
sb.append((char) ('a' + RANDOM.nextInt(26)));
}
}
return sb.toString();
}
@Setup
public void setup() throws Throwable {
string = randomAlphanumericString(size / 3) + "\n\r " + randomAlphanumericString(size / 3) + "\n "
+ randomAlphanumericString(size / 3);
}
static class Replacer {
private static final Pattern COMPILE = Pattern.compile("[\n\r]+ ");
static String unfold2(String s) {
s = s.replaceAll("\n\r ", "");
s = s.replaceAll("\r\n ", "");
s = s.replaceAll("\n ", "");
s = s.replaceAll("\r ", "");
return s;
}
static String unfold_regexp(String s) {
s = s.replaceAll("\n\r |\r\n |\n |\r ", "");
return s;
}
static String unfold_regexpcompiled(String s) {
s = COMPILE.matcher(s).replaceAll("");
return s;
}
static String unfold_cedric(String s) {
if (s == null || s.length() < 2) {
return s;
}
int len = s.length();
char p1 = 'x';
char p2 = 'x';
char[] sb = new char[len];
int wrtAt = 0;
for (int i = 0; i < len; i++) {
char c = s.charAt(i);
if (' '==c) {
if ('\n' == p1) {
if ('\r' == p2) {
wrtAt--;
}
wrtAt--;
} else if ('\r' == p1) {
if ('\n' == p2) {
wrtAt--;
}
wrtAt--;
} else {
sb[wrtAt++] = c;
}
} else {
sb[wrtAt++] = c;
}
p2 = p1;
p1 = c;
}
return new String(sb, 0, wrtAt);
}
private enum NLStatus {
NONE, RC, NL, RC_NL, NL_RC;
}
/**
* Remove all the '\n' and ’\r' followed by a ' ' from a LDIF String.
*
* @param s The String to unfold
* @return The resulting String
*/
protected static String unfold(String s) {
int pos = 0;
char[] unfold = new char[s.length()];
NLStatus newLine = NLStatus.NONE;
for (char c : s.toCharArray()) {
switch (c) {
case '\n':
switch (newLine) {
case NONE:
newLine = NLStatus.NL;
break;
case RC:
newLine = NLStatus.RC_NL;
break;
case NL:
unfold[pos++] = '\n';
break;
case RC_NL:
unfold[pos++] = '\r';
unfold[pos++] = '\n';
newLine = NLStatus.NL;
break;
case NL_RC:
unfold[pos++] = '\n';
unfold[pos++] = '\r';
newLine = NLStatus.NL;
break;
}
break;
case '\r':
switch (newLine) {
case NONE:
newLine = NLStatus.RC;
break;
case NL:
newLine = NLStatus.NL_RC;
break;
case RC:
unfold[pos++] = '\r';
break;
case RC_NL:
unfold[pos++] = '\r';
unfold[pos++] = '\n';
newLine = NLStatus.RC;
break;
case NL_RC:
unfold[pos++] = '\n';
unfold[pos++] = '\r';
newLine = NLStatus.RC;
break;
}
break;
case ' ':
if (newLine == NLStatus.NONE) {
unfold[pos++] = c;
} else {
newLine = NLStatus.NONE;
}
break;
default:
switch (newLine) {
case NONE:
break;
case NL:
unfold[pos++] = '\n';
newLine = NLStatus.NONE;
break;
case RC:
unfold[pos++] = '\r';
newLine = NLStatus.NONE;
break;
case NL_RC:
unfold[pos++] = '\n';
unfold[pos++] = '\r';
newLine = NLStatus.NONE;
break;
case RC_NL:
unfold[pos++] = '\r';
unfold[pos++] = '\n';
newLine = NLStatus.NONE;
break;
}
unfold[pos++] = c;
}
}
switch (newLine) {
case NONE:
break;
case NL:
unfold[pos++] = '\n';
break;
case RC:
unfold[pos++] = '\r';
break;
case NL_RC:
unfold[pos++] = '\n';
unfold[pos++] = '\r';
break;
case RC_NL:
unfold[pos++] = '\r';
unfold[pos++] = '\n';
break;
}
return new String(unfold, 0, pos);
}
private static final String[] TODO = {"\n\r ", "\r\n ", "\r ", "\n "};
private static final String[] TO = {"", "", "", ""};
protected static String unfold_common(final String string) {
return StringUtils.replaceEach(string, TODO, TO);
}
public static String unfold_olivier(String test) {
// Null -> null
if (test == null) {
return null;
}
// 0 or 1 char
if (test.length() < 2)
return test;
// 2 chars
if (test.length() == 2) {
if (test.charAt(1) == ' ') {
char c0 = test.charAt(0);
if (c0 == '\r' || c0 == '\n') {
return "";
}
}
return test;
}
// More than 2 chars
char[] chars = test.toCharArray();
char[] dest = new char[chars.length];
int p = chars.length - 1;
int d = p;
while (p >= 2) {
char c = chars[p];
// Not a space : keep as is
if (c != ' ') {
dest[d] = c;
p--;
d--;
}
// Space
else {
char c1 = chars[p - 1];
// Previous char is special : investigate deeper
if (c1 == '\r' || c1 == '\n') {
p--;
char c2 = chars[p - 1];
if ((c2 == '\r' || c2 == '\n') && c2 != c1) {
p--;
}
p--;
}
// It was just a space : keep it
else {
dest[d] = c;
p--;
d--;
}
}
}
// Keep the remaining chars as it (special cases already covered)
while (p >= 0) {
dest[d--] = chars[p--];
}
return new String(dest, d + 1, chars.length - d - 1);
}
public static String unfold3(String test) {
// Null -> null
if (test == null) {
return null;
}
char[] chars = test.toCharArray();
// 0 or 1 char
if (chars.length < 2) {
return test;
}
// 2 chars
if (chars.length == 2) {
if (chars[1] == ' ') {
if (chars[0] == '\r' || chars[0] == '\n') {
return "";
}
}
return test;
}
// More than 2 chars
int p = chars.length - 1;
int d = p;
while (p >= 0) {
char c = chars[p];
// Not a space : keep as is
if (c != ' ') {
chars[d] = c;
p--;
d--;
}
// Space
else {
char c1 = chars[p - 1];
// Previous char is special : investigate deeper
if (c1 == '\r') {
if ((chars[p - 2] == '\n')) {
p -= 3;
} else {
p -= 2;
}
} else if (c1 == '\n') {
if ((chars[p - 2] == '\r')) {
p -= 3;
} else {
p -= 2;
}
}
// It was just a space : keep it
else {
chars[d] = c;
p--;
d--;
}
}
}
// Keep the remaining chars as it (special cases already covered)
while (p >= 0) {
chars[d--] = chars[p--];
}
return new String(chars, d + 1, chars.length - d - 1);
}
}
//@Benchmark
public String unfold_original() {
return Replacer.unfold2(string);
}
//@Benchmark
public String unfold_all_regexp() {
return Replacer.unfold_regexp(string);
}
//@Benchmark
public String unfold_compiled_regexp() {
return Replacer.unfold_regexpcompiled(string);
}
//@Benchmark
public String unfold_very_complicated() {
return Replacer.unfold(string);
}
//@Benchmark
public String unfold_unfold_common() {
return Replacer.unfold_common(string);
}
//@Benchmark
public String unfold_unfold_olivier() {
return Replacer.unfold_olivier(string);
}
@Benchmark
public String unfold_unfold_optim_olivier() {
return Replacer.unfold3(string);
}
@Benchmark
public String unfold_cedric() {
return Replacer.unfold_cedric(string);
}
public static void main(String[] args) {
for (int i = 0; i < 1000; i++) {
String str = randomAlphanumericString(20) + "\n\r " + randomAlphanumericString(30) + "\n "
+ randomAlphanumericString(30);
String orig = str.replace('\n', 'N').replace('\r', 'R').replace(' ', '_');
String ref = Replacer.unfold_regexp(str).replace('\n', 'N').replace('\r', 'R').replace(' ', '_');
String cmp = Replacer.unfold_cedric(str).replace('\n', 'N').replace('\r', 'R').replace(' ', '_');
boolean equals = ref.equals(cmp);
if (!equals) {
System.err.println(orig);
System.err.println(ref);
System.err.println(cmp);
System.err.println("---------------------------");
}
}
}
}
Une version améliorée de la propale de Cédric, où on réutilise le char array au lieu d'en créer un :
static String unfold_cedric_improved( String s )
{
if ( s == null || s.length() < 2 )
{
return s;
}
int len = s.length();
char p1 = 'x';
char p2 = 'x';
char[] chars = s.toCharArray();
int wrtAt = 0;
for ( int i = 0; i < len; i++ )
{
char c = chars[i];
if ( ' ' == c )
{
if ( '\n' == p1 )
{
if ( '\r' == p2 )
{
wrtAt--;
}
wrtAt--;
}
else if ( '\r' == p1 )
{
if ( '\n' == p2 )
{
wrtAt--;
}
wrtAt--;
}
else
{
chars[wrtAt++] = c;
}
}
else
{
chars[wrtAt++] = c;
}
p2 = p1;
p1 = c;
}
return new String( chars, 0, wrtAt );
}
En fait, pas sûr que ce soit plus rapide. Le s.toCharArray() créé un nouveay char[] et copie l'original dedans. Le gain éventuel est sur la suppression du s.charAt(i), avec élimination des deux tests sur les frontières.
Version un peu abusée niveau lisibilité, mais qui selon mes tests est la plus rapide:
static String unfold_cedric_ultimate(String s) {
if (s == null || s.length() < 2) {
return s;
}
int len = s.length();
char p1 = 'x';
char p2 = 'x';
char[] chars = s.toCharArray();
int wrtAt = 0;
for (int i = 0; i < len; i++) {
char c = chars[i];
try {
if (' ' == c) {
if ('\n' == p1) {
if ('\r' == p2) {
wrtAt--;
}
wrtAt--;
continue;
}
if ('\r' == p1) {
if ('\n' == p2) {
wrtAt--;
}
wrtAt--;
continue;
}
}
chars[wrtAt++] = c;
} finally {
p2 = p1;
p1 = c;
}
}
return new String(chars, 0, wrtAt);
}
Bon, là on est pas trop mal...
static String unfold_cedric_ultimate(String s) {
if (s == null || s.length() < 2) {
return s;
}
char p1 = 'x';
char p2 = 'x';
char[] chars = s.toCharArray();
int wrtAt = 0;
for (char c : chars) {
chars[wrtAt++] = c;
if (' ' == c) {
if ('\n' == p1) {
if ('\r' == p2) {
wrtAt--;
}
wrtAt--;
wrtAt--;
}
if ('\r' == p1) {
if ('\n' == p2) {
wrtAt--;
}
wrtAt--;
wrtAt--;
}
}
p2 = p1;
p1 = c;
}
return new String(chars, 0, wrtAt);
}
After running the full benchmark:
The following methods are deemed to be correct:
Reflection.unfold_cedric_ultimate2
Reflection.unfold_cedric
Reflection.unfold_cedric_improved
Reflection.unfold_regexp
Reflection.unfold_cedric_ultimate
Reflection.unfold_common
The following methods are proved to be incorrect:
Reflection.unfold_regexpcompiled
Reflection.unfold
Reflection.unfold3
Reflection.unfold2
Reflection.unfold_olivier
# Run complete. Total time: 00:26:35
Benchmark (size) Mode Samples Score Error Units
r.ReplaceAllBenchmark.unfold_all_regexp 10 thrpt 20 1571.746 ± 29.372 ops/ms
r.ReplaceAllBenchmark.unfold_all_regexp 100 thrpt 20 345.773 ± 2.164 ops/ms
r.ReplaceAllBenchmark.unfold_all_regexp 1000 thrpt 20 39.093 ± 0.554 ops/ms
r.ReplaceAllBenchmark.unfold_cedric 10 thrpt 20 25168.509 ± 525.919 ops/ms
r.ReplaceAllBenchmark.unfold_cedric 100 thrpt 20 7124.064 ± 223.885 ops/ms
r.ReplaceAllBenchmark.unfold_cedric 1000 thrpt 20 814.544 ± 16.060 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_improved 10 thrpt 20 33391.135 ± 395.195 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_improved 100 thrpt 20 9804.861 ± 104.523 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_improved 1000 thrpt 20 906.952 ± 12.452 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_ultimate 10 thrpt 20 35065.155 ± 696.067 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_ultimate 100 thrpt 20 9382.235 ± 138.609 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_ultimate 1000 thrpt 20 750.416 ± 15.975 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_ultimate2 10 thrpt 20 31528.472 ± 183.737 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_ultimate2 100 thrpt 20 9378.629 ± 143.134 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_ultimate2 1000 thrpt 20 862.118 ± 18.347 ops/ms
r.ReplaceAllBenchmark.unfold_compiled_regexp 10 thrpt 20 4522.387 ± 120.559 ops/ms
r.ReplaceAllBenchmark.unfold_compiled_regexp 100 thrpt 20 813.303 ± 14.269 ops/ms
r.ReplaceAllBenchmark.unfold_compiled_regexp 1000 thrpt 20 100.297 ± 2.482 ops/ms
r.ReplaceAllBenchmark.unfold_original 10 thrpt 20 1137.678 ± 11.718 ops/ms
r.ReplaceAllBenchmark.unfold_original 100 thrpt 20 420.988 ± 15.005 ops/ms
r.ReplaceAllBenchmark.unfold_original 1000 thrpt 20 54.039 ± 1.534 ops/ms
r.ReplaceAllBenchmark.unfold_unfold_common 10 thrpt 20 6582.421 ± 232.638 ops/ms
r.ReplaceAllBenchmark.unfold_unfold_common 100 thrpt 20 1803.557 ± 61.737 ops/ms
r.ReplaceAllBenchmark.unfold_unfold_common 1000 thrpt 20 113.271 ± 1.129 ops/ms
r.ReplaceAllBenchmark.unfold_unfold_olivier 10 thrpt 20 21048.183 ± 509.316 ops/ms
r.ReplaceAllBenchmark.unfold_unfold_olivier 100 thrpt 20 6366.979 ± 81.390 ops/ms
r.ReplaceAllBenchmark.unfold_unfold_olivier 1000 thrpt 20 578.769 ± 10.877 ops/ms
r.ReplaceAllBenchmark.unfold_unfold_optim_olivier 10 thrpt 20 34515.031 ± 344.551 ops/ms
r.ReplaceAllBenchmark.unfold_unfold_optim_olivier 100 thrpt 20 7551.517 ± 53.829 ops/ms
r.ReplaceAllBenchmark.unfold_unfold_optim_olivier 1000 thrpt 20 673.183 ± 11.679 ops/ms
r.ReplaceAllBenchmark.unfold_very_complicated 10 thrpt 20 13886.697 ± 366.905 ops/ms
r.ReplaceAllBenchmark.unfold_very_complicated 100 thrpt 20 3173.519 ± 153.616 ops/ms
r.ReplaceAllBenchmark.unfold_very_complicated 1000 thrpt 20 285.992 ± 15.024 ops/ms
En ne gardant que les algos corrects:
# Run complete. Total time: 00:14:30
Benchmark (size) Mode Samples Score Error Units
r.ReplaceAllBenchmark.unfold_all_regexp 10 thrpt 20 1467.862 ± 28.884 ops/ms
r.ReplaceAllBenchmark.unfold_all_regexp 100 thrpt 20 342.911 ± 4.772 ops/ms
r.ReplaceAllBenchmark.unfold_all_regexp 1000 thrpt 20 39.269 ± 0.936 ops/ms
r.ReplaceAllBenchmark.unfold_cedric 10 thrpt 20 24729.376 ± 935.766 ops/ms
r.ReplaceAllBenchmark.unfold_cedric 100 thrpt 20 7074.242 ± 53.409 ops/ms
r.ReplaceAllBenchmark.unfold_cedric 1000 thrpt 20 809.072 ± 16.939 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_improved 10 thrpt 20 36381.822 ± 263.571 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_improved 100 thrpt 20 9391.023 ± 60.022 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_improved 1000 thrpt 20 720.513 ± 5.490 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_ultimate 10 thrpt 20 36553.386 ± 367.102 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_ultimate 100 thrpt 20 9565.954 ± 142.565 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_ultimate 1000 thrpt 20 877.977 ± 11.356 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_ultimate2 10 thrpt 20 32364.978 ± 706.792 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_ultimate2 100 thrpt 20 9668.940 ± 72.017 ops/ms
r.ReplaceAllBenchmark.unfold_cedric_ultimate2 1000 thrpt 20 932.645 ± 5.214 ops/ms
r.ReplaceAllBenchmark.unfold_unfold_common 10 thrpt 20 6869.541 ± 133.914 ops/ms
r.ReplaceAllBenchmark.unfold_unfold_common 100 thrpt 20 1739.391 ± 18.654 ops/ms
r.ReplaceAllBenchmark.unfold_unfold_common 1000 thrpt 20 133.172 ± 2.303 ops/ms
Hey, mon algo était correct normalement ^^;
Quel est le problème ? Quels sont les cas qui ne passent pas ?
C'est pas moi qui ait dit qu'il était faux, c'est la machine. Sur des cas concrets :) Tu peux regarder ce que fait le main
pour trouver des exemples qui plantent. De mémoire le tien se plantait sur des séquences du genre \r\r (mais j'ai un doute, ils ont tous des pbs différents :)).
Hmmm, effectivement, un cas n'était pas géré. J'en ai profité pour essayer d'améliorer un peu l'algo. Tu peux le tester ?
public static String unfold_olivier2(String test) {
// Throws NPE if null, like the original method
if (test.length() < 2) return test;
char[] chars = test.toCharArray();
int p = chars.length - 1;
int d = p;
char c, c1, c2;
while (p > 0) {
c = chars[p];
c1 = chars[p - 1];
if (c == ' ' && (c1 == '\n' || c1 == '\r')) {
p--;
if (p > 0) {
c2 = chars[p - 1];
if ((c2 == '\n' || c2 == '\r') && c2 != c1) {
p--;
}
}
}
else {
chars[d] = c;
d--;
}
p--;
}
while (p >= 0) {
chars[d--] = chars[p--];
}
return new String(chars, d + 1, chars.length - d - 1);
}
Results: