Skip to content

Instantly share code, notes, and snippets.

@twillouer
Last active August 29, 2015 14:17
Show Gist options
  • Save twillouer/e4180390dc503fe886d0 to your computer and use it in GitHub Desktop.
Save twillouer/e4180390dc503fe886d0 to your computer and use it in GitHub Desktop.
@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
@Fork(3)
@BenchmarkMode(Mode.Throughput)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Benchmark)
public class ReplaceAllBench {
@Param({ "10", "100", "1000" })
private int size;
String string;
@Setup
public void setup() throws Throwable
{
string = randomAlphanumericString(size / 3) + "\n\r " + randomAlphanumericString(size / 3) + "\n "
+ randomAlphanumericString(size / 3);
}
static class Replacer {
private static final Pattern COMPILE = Pattern.compile("[\n\r]+ ");
static String unfold_original(String s)
{
s = s.replaceAll("\n\r ", "");
s = s.replaceAll("\r\n ", "");
s = s.replaceAll("\n ", "");
s = s.replaceAll("\r ", "");
return s;
}
static String unfold_regexp(String s)
{
s = s.replaceAll("\n\r |\r\n |\n |\r ", "");
return s;
}
static String unfold_regexpcompiled(String s)
{
s = COMPILE.matcher(s).replaceAll("");
return s;
}
private enum NLStatus {
NONE, RC, NL, RC_NL, NL_RC;
}
/**
* Remove all the '\n' and ’\r' followed by a ' ' from a LDIF String.
*
* @param s The String to unfold
* @return The resulting String
*/
protected static String unfold_optim_emmanuel_1(String s)
{
int pos = 0;
char[] unfold = new char[s.length()];
NLStatus newLine = NLStatus.NONE;
for (char c : s.toCharArray()) {
switch (c) {
case '\n':
switch (newLine) {
case NONE:
newLine = NLStatus.NL;
break;
case RC:
newLine = NLStatus.RC_NL;
break;
case NL:
unfold[pos++] = '\n';
break;
case RC_NL:
unfold[pos++] = '\r';
unfold[pos++] = '\n';
newLine = NLStatus.NL;
break;
case NL_RC:
unfold[pos++] = '\n';
unfold[pos++] = '\r';
newLine = NLStatus.NL;
break;
}
break;
case '\r':
switch (newLine) {
case NONE:
newLine = NLStatus.RC;
break;
case NL:
newLine = NLStatus.NL_RC;
break;
case RC:
unfold[pos++] = '\r';
break;
case RC_NL:
unfold[pos++] = '\r';
unfold[pos++] = '\n';
newLine = NLStatus.RC;
break;
case NL_RC:
unfold[pos++] = '\n';
unfold[pos++] = '\r';
newLine = NLStatus.RC;
break;
}
break;
case ' ':
if (newLine == NLStatus.NONE) {
unfold[pos++] = c;
} else {
newLine = NLStatus.NONE;
}
break;
default:
switch (newLine) {
case NONE:
break;
case NL:
unfold[pos++] = '\n';
newLine = NLStatus.NONE;
break;
case RC:
unfold[pos++] = '\r';
newLine = NLStatus.NONE;
break;
case NL_RC:
unfold[pos++] = '\n';
unfold[pos++] = '\r';
newLine = NLStatus.NONE;
break;
case RC_NL:
unfold[pos++] = '\r';
unfold[pos++] = '\n';
newLine = NLStatus.NONE;
break;
}
unfold[pos++] = c;
}
}
switch (newLine) {
case NONE:
break;
case NL:
unfold[pos++] = '\n';
break;
case RC:
unfold[pos++] = '\r';
break;
case NL_RC:
unfold[pos++] = '\n';
unfold[pos++] = '\r';
break;
case RC_NL:
unfold[pos++] = '\r';
unfold[pos++] = '\n';
break;
}
return new String(unfold, 0, pos);
}
private static final String[] TODO = { "\n\r ", "\r\n ", "\r ", "\n " };
private static final String[] TO = { "", "", "", "" };
protected static String unfold_with_stringutils_on_apache_common(final String string)
{
return StringUtils.replaceEach(string, TODO, TO);
}
public static String unfold_olivier(String test)
{
// Null -> null
if (test == null) {
return null;
}
// 0 or 1 char
if (test.length() < 2)
return test;
// 2 chars
if (test.length() == 2) {
if (test.charAt(1) == ' ') {
char c0 = test.charAt(0);
if (c0 == '\r' || c0 == '\n') {
return "";
}
}
return test;
}
// More than 2 chars
char[] chars = test.toCharArray();
char[] dest = new char[chars.length];
int p = chars.length - 1;
int d = p;
while (p >= 2) {
char c = chars[p];
// Not a space : keep as is
if (c != ' ') {
dest[d] = c;
p--;
d--;
}
// Space
else {
char c1 = chars[p - 1];
// Previous char is special : investigate deeper
if (c1 == '\r' || c1 == '\n') {
p--;
char c2 = chars[p - 1];
if ((c2 == '\r' || c2 == '\n') && c2 != c1) {
p--;
}
p--;
}
// It was just a space : keep it
else {
dest[d] = c;
p--;
d--;
}
}
}
// Keep the remaining chars as it (special cases already covered)
while (p >= 0) {
dest[d--] = chars[p--];
}
return new String(dest, d + 1, chars.length - d - 1);
}
public static String unfold_olivier_optimise_par_emmanuel(String test)
{
// Null -> null
if ( test == null )
{
return null;
}
char[] chars = test.toCharArray();
// 0 or 1 char
if ( chars.length < 2 )
{
return test;
}
// 2 chars
if ( chars.length == 2 )
{
if ( chars[1] == ' ' )
{
if ( chars[0] == '\r' || chars[0] == '\n' )
{
return "";
}
}
return test;
}
// More than 2 chars
int p = chars.length - 1;
int d = p;
while ( p >= 0 )
{
char c = chars[p];
// Not a space : keep as is
if ( c != ' ' )
{
chars[d] = c;
p--;
d--;
}
// Space
else
{
char c1 = chars[p - 1];
// Previous char is special : investigate deeper
if ( c1 == '\r' )
{
if ( ( chars[p - 2] == '\n' ) )
{
p -= 3;
}
else
{
p -= 2;
}
}
else if ( c1 == '\n' )
{
if ( ( chars[p - 2] == '\r' ) )
{
p -= 3;
}
else
{
p -= 2;
}
}
// It was just a space : keep it
else
{
chars[d] = c;
p--;
d--;
}
}
}
// Keep the remaining chars as it (special cases already covered)
while ( p >= 0 )
{
chars[d--] = chars[p--];
}
return new String( chars, d + 1, chars.length - d - 1 );
}
}
@Benchmark
public String unfold_original()
{
return Replacer.unfold_original(string);
}
@Benchmark
public String unfold_regexp()
{
return Replacer.unfold_regexp(string);
}
@Benchmark
public String unfold_regexpcompiled()
{
return Replacer.unfold_regexpcompiled(string);
}
@Benchmark
public String unfold_optim_emmanuel_1()
{
return Replacer.unfold_optim_emmanuel_1(string);
}
@Benchmark
public String unfold_with_stringutils_on_apache_common()
{
return Replacer.unfold_with_stringutils_on_apache_common(string);
}
@Benchmark
public String unfold_unfold_olivier()
{
return Replacer.unfold_olivier(string);
}
@Benchmark
public String unfold_olivier_optimise_par_emmanuel()
{
return Replacer.unfold_olivier_optimise_par_emmanuel(string);
}
public static void main(String[] args) throws RunnerException, IOException
{
Options opt = new OptionsBuilder().include(".*" + ReplaceAllBench.class.getSimpleName() + ".*")
.warmupIterations(20)
.warmupTime(TimeValue.seconds(1))
.measurementIterations(20)
.timeUnit(TimeUnit.NANOSECONDS)
.forks(1)
// .addProfiler(LinuxPerfAsmProfiler.class)
.build();
new Runner(opt).run();
}
}
@OlivierCroisier
Copy link

Hmmm, effectivement, un cas n'était pas géré. J'en ai profité pour essayer d'améliorer un peu l'algo. Tu peux le tester ?

public static String unfold_olivier2(String test) {

    // Throws NPE if null, like the original method
    if (test.length() < 2) return test;

    char[] chars = test.toCharArray();
    int p = chars.length - 1;
    int d = p;
    char c, c1, c2;
    while (p > 0) {
        c = chars[p];
        c1 = chars[p - 1];
        if (c == ' ' && (c1 == '\n' || c1 == '\r')) {
            p--;
            if (p > 0) {
                c2 = chars[p - 1];
                if ((c2 == '\n' || c2 == '\r') && c2 != c1) {
                    p--;
                }
            }
        }
        else {
            chars[d] = c;
            d--;
        }
        p--;
    }
    while (p >= 0) {
        chars[d--] = chars[p--];
    }

    return new String(chars, d + 1, chars.length - d - 1);
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment