twillouer/ReplaceAll

## ReplaceAll
@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
@Fork(3)
@BenchmarkMode(Mode.Throughput)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Benchmark)
public class ReplaceAllBench {

    @Param({ "10", "100", "1000" })
    private int size;

    String string;

    @Setup
    public void setup() throws Throwable
    {
        string = randomAlphanumericString(size / 3) + "\n\r " + randomAlphanumericString(size / 3) + "\n "
                + randomAlphanumericString(size / 3);
    }

    static class Replacer {
        private static final Pattern COMPILE = Pattern.compile("[\n\r]+ ");

        static String unfold_original(String s)
        {
            s = s.replaceAll("\n\r ", "");
            s = s.replaceAll("\r\n ", "");
            s = s.replaceAll("\n ", "");
            s = s.replaceAll("\r ", "");
            return s;
        }

        static String unfold_regexp(String s)
        {
            s = s.replaceAll("\n\r |\r\n |\n |\r ", "");
            return s;
        }

        static String unfold_regexpcompiled(String s)
        {
            s = COMPILE.matcher(s).replaceAll("");
            return s;
        }

        private enum NLStatus {
            NONE, RC, NL, RC_NL, NL_RC;
        }

        /**
         * Remove all the '\n' and ’\r' followed by a ' ' from a LDIF String.
         *
         * @param s The String to unfold
         * @return The resulting String
         */
        protected static String unfold_optim_emmanuel_1(String s)
        {
            int pos = 0;
            char[] unfold = new char[s.length()];
            NLStatus newLine = NLStatus.NONE;

            for (char c : s.toCharArray()) {
                switch (c) {
                case '\n':
                    switch (newLine) {
                    case NONE:
                        newLine = NLStatus.NL;
                        break;

                    case RC:
                        newLine = NLStatus.RC_NL;
                        break;

                    case NL:
                        unfold[pos++] = '\n';
                        break;

                    case RC_NL:
                        unfold[pos++] = '\r';
                        unfold[pos++] = '\n';
                        newLine = NLStatus.NL;
                        break;

                    case NL_RC:
                        unfold[pos++] = '\n';
                        unfold[pos++] = '\r';
                        newLine = NLStatus.NL;
                        break;
                    }

                    break;

                case '\r':
                    switch (newLine) {
                    case NONE:
                        newLine = NLStatus.RC;
                        break;

                    case NL:
                        newLine = NLStatus.NL_RC;
                        break;

                    case RC:
                        unfold[pos++] = '\r';
                        break;

                    case RC_NL:
                        unfold[pos++] = '\r';
                        unfold[pos++] = '\n';
                        newLine = NLStatus.RC;
                        break;

                    case NL_RC:
                        unfold[pos++] = '\n';
                        unfold[pos++] = '\r';
                        newLine = NLStatus.RC;
                        break;
                    }

                    break;

                case ' ':
                    if (newLine == NLStatus.NONE) {
                        unfold[pos++] = c;
                    } else {
                        newLine = NLStatus.NONE;
                    }

                    break;

                default:
                    switch (newLine) {
                    case NONE:
                        break;

                    case NL:
                        unfold[pos++] = '\n';
                        newLine = NLStatus.NONE;
                        break;

                    case RC:
                        unfold[pos++] = '\r';
                        newLine = NLStatus.NONE;
                        break;

                    case NL_RC:
                        unfold[pos++] = '\n';
                        unfold[pos++] = '\r';
                        newLine = NLStatus.NONE;
                        break;

                    case RC_NL:
                        unfold[pos++] = '\r';
                        unfold[pos++] = '\n';
                        newLine = NLStatus.NONE;
                        break;
                    }

                    unfold[pos++] = c;
                }
            }

            switch (newLine) {
            case NONE:
                break;

            case NL:
                unfold[pos++] = '\n';
                break;

            case RC:
                unfold[pos++] = '\r';
                break;

            case NL_RC:
                unfold[pos++] = '\n';
                unfold[pos++] = '\r';
                break;

            case RC_NL:
                unfold[pos++] = '\r';
                unfold[pos++] = '\n';
                break;
            }

            return new String(unfold, 0, pos);
        }

        private static final String[] TODO = { "\n\r ", "\r\n ", "\r ", "\n " };

        private static final String[] TO = { "", "", "", "" };

        protected static String unfold_with_stringutils_on_apache_common(final String string)
        {
            return StringUtils.replaceEach(string, TODO, TO);
        }

        public static String unfold_olivier(String test)
        {

            // Null -> null
            if (test == null) {
                return null;
            }

            // 0 or 1 char
            if (test.length() < 2)
                return test;

            // 2 chars
            if (test.length() == 2) {
                if (test.charAt(1) == ' ') {
                    char c0 = test.charAt(0);
                    if (c0 == '\r' || c0 == '\n') {
                        return "";
                    }
                }
                return test;
            }

            // More than 2 chars
            char[] chars = test.toCharArray();
            char[] dest = new char[chars.length];
            int p = chars.length - 1;
            int d = p;
            while (p >= 2) {
                char c = chars[p];
                // Not a space : keep as is
                if (c != ' ') {
                    dest[d] = c;
                    p--;
                    d--;
                }
                // Space
                else {
                    char c1 = chars[p - 1];
                    // Previous char is special : investigate deeper
                    if (c1 == '\r' || c1 == '\n') {
                        p--;
                        char c2 = chars[p - 1];
                        if ((c2 == '\r' || c2 == '\n') && c2 != c1) {
                            p--;
                        }
                        p--;
                    }
                    // It was just a space : keep it
                    else {
                        dest[d] = c;
                        p--;
                        d--;
                    }
                }
            }
            // Keep the remaining chars as it (special cases already covered)
            while (p >= 0) {
                dest[d--] = chars[p--];
            }

            return new String(dest, d + 1, chars.length - d - 1);
        }

      public static String unfold_olivier_optimise_par_emmanuel(String test)
      {
        // Null -> null
        if ( test == null )
        {
          return null;
        }

        char[] chars = test.toCharArray();

        // 0 or 1 char
        if ( chars.length < 2 )
        {
          return test;
        }

        // 2 chars
        if ( chars.length == 2 )
        {
          if ( chars[1] == ' ' )
          {
            if ( chars[0] == '\r' || chars[0] == '\n' )
            {
              return "";
            }
          }

          return test;
        }

        // More than 2 chars
        int p = chars.length - 1;
        int d = p;

        while ( p >= 0 )
        {
          char c = chars[p];
          // Not a space : keep as is
          if ( c != ' ' )
          {
            chars[d] = c;
            p--;
            d--;
          }
          // Space
          else
          {
            char c1 = chars[p - 1];
            // Previous char is special : investigate deeper
            if ( c1 == '\r' )
            {
              if ( ( chars[p - 2] == '\n' ) )
              {
                p -= 3;
              }
              else
              {
                p -= 2;
              }
            }
            else if ( c1 == '\n' )
            {
              if ( ( chars[p - 2] == '\r' ) )
              {
                p -= 3;
              }
              else
              {
                p -= 2;
              }
            }
            // It was just a space : keep it
            else
            {
              chars[d] = c;
              p--;
              d--;
            }
          }
        }
        // Keep the remaining chars as it (special cases already covered)
        while ( p >= 0 )
        {
          chars[d--] = chars[p--];
        }

        return new String( chars, d + 1, chars.length - d - 1 );
      }
    }

    @Benchmark
    public String unfold_original()
    {
        return Replacer.unfold_original(string);
    }

    @Benchmark
    public String unfold_regexp()
    {
        return Replacer.unfold_regexp(string);
    }

    @Benchmark
    public String unfold_regexpcompiled()
    {
        return Replacer.unfold_regexpcompiled(string);
    }

    @Benchmark
    public String unfold_optim_emmanuel_1()
    {
        return Replacer.unfold_optim_emmanuel_1(string);
    }

    @Benchmark
    public String unfold_with_stringutils_on_apache_common()
    {
        return Replacer.unfold_with_stringutils_on_apache_common(string);
    }

    @Benchmark
    public String unfold_unfold_olivier()
    {
        return Replacer.unfold_olivier(string);
    }

    @Benchmark
    public String unfold_olivier_optimise_par_emmanuel()
    {
        return Replacer.unfold_olivier_optimise_par_emmanuel(string);
    }

    public static void main(String[] args) throws RunnerException, IOException
    {
        Options opt = new OptionsBuilder().include(".*" + ReplaceAllBench.class.getSimpleName() + ".*")
                .warmupIterations(20)
                .warmupTime(TimeValue.seconds(1))
                .measurementIterations(20)
                .timeUnit(TimeUnit.NANOSECONDS)
                .forks(1)
                // .addProfiler(LinuxPerfAsmProfiler.class)
                .build();

        new Runner(opt).run();
    }
}
	@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
	@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
	@Fork(3)
	@BenchmarkMode(Mode.Throughput)
	@OutputTimeUnit(TimeUnit.NANOSECONDS)
	@State(Scope.Benchmark)
	public class ReplaceAllBench {

	@Param({ "10", "100", "1000" })
	private int size;

	String string;

	@Setup
	public void setup() throws Throwable
	{
	string = randomAlphanumericString(size / 3) + "\n\r " + randomAlphanumericString(size / 3) + "\n "
	+ randomAlphanumericString(size / 3);
	}

	static class Replacer {
	private static final Pattern COMPILE = Pattern.compile("[\n\r]+ ");

	static String unfold_original(String s)
	{
	s = s.replaceAll("\n\r ", "");
	s = s.replaceAll("\r\n ", "");
	s = s.replaceAll("\n ", "");
	s = s.replaceAll("\r ", "");
	return s;
	}

	static String unfold_regexp(String s)
	{
	s = s.replaceAll("\n\r \|\r\n \|\n \|\r ", "");
	return s;
	}

	static String unfold_regexpcompiled(String s)
	{
	s = COMPILE.matcher(s).replaceAll("");
	return s;
	}

	private enum NLStatus {
	NONE, RC, NL, RC_NL, NL_RC;
	}

	/**
	* Remove all the '\n' and ’\r' followed by a ' ' from a LDIF String.
	*
	* @param s The String to unfold
	* @return The resulting String
	*/
	protected static String unfold_optim_emmanuel_1(String s)
	{
	int pos = 0;
	char[] unfold = new char[s.length()];
	NLStatus newLine = NLStatus.NONE;

	for (char c : s.toCharArray()) {
	switch (c) {
	case '\n':
	switch (newLine) {
	case NONE:
	newLine = NLStatus.NL;
	break;

	case RC:
	newLine = NLStatus.RC_NL;
	break;

	case NL:
	unfold[pos++] = '\n';
	break;

	case RC_NL:
	unfold[pos++] = '\r';
	unfold[pos++] = '\n';
	newLine = NLStatus.NL;
	break;

	case NL_RC:
	unfold[pos++] = '\n';
	unfold[pos++] = '\r';
	newLine = NLStatus.NL;
	break;
	}

	break;

	case '\r':
	switch (newLine) {
	case NONE:
	newLine = NLStatus.RC;
	break;

	case NL:
	newLine = NLStatus.NL_RC;
	break;

	case RC:
	unfold[pos++] = '\r';
	break;

	case RC_NL:
	unfold[pos++] = '\r';
	unfold[pos++] = '\n';
	newLine = NLStatus.RC;
	break;

	case NL_RC:
	unfold[pos++] = '\n';
	unfold[pos++] = '\r';
	newLine = NLStatus.RC;
	break;
	}

	break;

	case ' ':
	if (newLine == NLStatus.NONE) {
	unfold[pos++] = c;
	} else {
	newLine = NLStatus.NONE;
	}

	break;

	default:
	switch (newLine) {
	case NONE:
	break;

	case NL:
	unfold[pos++] = '\n';
	newLine = NLStatus.NONE;
	break;

	case RC:
	unfold[pos++] = '\r';
	newLine = NLStatus.NONE;
	break;

	case NL_RC:
	unfold[pos++] = '\n';
	unfold[pos++] = '\r';
	newLine = NLStatus.NONE;
	break;

	case RC_NL:
	unfold[pos++] = '\r';
	unfold[pos++] = '\n';
	newLine = NLStatus.NONE;
	break;
	}

	unfold[pos++] = c;
	}
	}

	switch (newLine) {
	case NONE:
	break;

	case NL:
	unfold[pos++] = '\n';
	break;

	case RC:
	unfold[pos++] = '\r';
	break;

	case NL_RC:
	unfold[pos++] = '\n';
	unfold[pos++] = '\r';
	break;

	case RC_NL:
	unfold[pos++] = '\r';
	unfold[pos++] = '\n';
	break;
	}

	return new String(unfold, 0, pos);
	}

	private static final String[] TODO = { "\n\r ", "\r\n ", "\r ", "\n " };

	private static final String[] TO = { "", "", "", "" };

	protected static String unfold_with_stringutils_on_apache_common(final String string)
	{
	return StringUtils.replaceEach(string, TODO, TO);
	}

	public static String unfold_olivier(String test)
	{

	// Null -> null
	if (test == null) {
	return null;
	}

	// 0 or 1 char
	if (test.length() < 2)
	return test;

	// 2 chars
	if (test.length() == 2) {
	if (test.charAt(1) == ' ') {
	char c0 = test.charAt(0);
	if (c0 == '\r' \|\| c0 == '\n') {
	return "";
	}
	}
	return test;
	}

	// More than 2 chars
	char[] chars = test.toCharArray();
	char[] dest = new char[chars.length];
	int p = chars.length - 1;
	int d = p;
	while (p >= 2) {
	char c = chars[p];
	// Not a space : keep as is
	if (c != ' ') {
	dest[d] = c;
	p--;
	d--;
	}
	// Space
	else {
	char c1 = chars[p - 1];
	// Previous char is special : investigate deeper
	if (c1 == '\r' \|\| c1 == '\n') {
	p--;
	char c2 = chars[p - 1];
	if ((c2 == '\r' \|\| c2 == '\n') && c2 != c1) {
	p--;
	}
	p--;
	}
	// It was just a space : keep it
	else {
	dest[d] = c;
	p--;
	d--;
	}
	}
	}
	// Keep the remaining chars as it (special cases already covered)
	while (p >= 0) {
	dest[d--] = chars[p--];
	}

	return new String(dest, d + 1, chars.length - d - 1);
	}

	public static String unfold_olivier_optimise_par_emmanuel(String test)
	{
	// Null -> null
	if ( test == null )
	{
	return null;
	}

	char[] chars = test.toCharArray();

	// 0 or 1 char
	if ( chars.length < 2 )
	{
	return test;
	}

	// 2 chars
	if ( chars.length == 2 )
	{
	if ( chars[1] == ' ' )
	{
	if ( chars[0] == '\r' \|\| chars[0] == '\n' )
	{
	return "";
	}
	}

	return test;
	}

	// More than 2 chars
	int p = chars.length - 1;
	int d = p;

	while ( p >= 0 )
	{
	char c = chars[p];
	// Not a space : keep as is
	if ( c != ' ' )
	{
	chars[d] = c;
	p--;
	d--;
	}
	// Space
	else
	{
	char c1 = chars[p - 1];
	// Previous char is special : investigate deeper
	if ( c1 == '\r' )
	{
	if ( ( chars[p - 2] == '\n' ) )
	{
	p -= 3;
	}
	else
	{
	p -= 2;
	}
	}
	else if ( c1 == '\n' )
	{
	if ( ( chars[p - 2] == '\r' ) )
	{
	p -= 3;
	}
	else
	{
	p -= 2;
	}
	}
	// It was just a space : keep it
	else
	{
	chars[d] = c;
	p--;
	d--;
	}
	}
	}
	// Keep the remaining chars as it (special cases already covered)
	while ( p >= 0 )
	{
	chars[d--] = chars[p--];
	}

	return new String( chars, d + 1, chars.length - d - 1 );
	}
	}

	@Benchmark
	public String unfold_original()
	{
	return Replacer.unfold_original(string);
	}

	@Benchmark
	public String unfold_regexp()
	{
	return Replacer.unfold_regexp(string);
	}

	@Benchmark
	public String unfold_regexpcompiled()
	{
	return Replacer.unfold_regexpcompiled(string);
	}

	@Benchmark
	public String unfold_optim_emmanuel_1()
	{
	return Replacer.unfold_optim_emmanuel_1(string);
	}

	@Benchmark
	public String unfold_with_stringutils_on_apache_common()
	{
	return Replacer.unfold_with_stringutils_on_apache_common(string);
	}

	@Benchmark
	public String unfold_unfold_olivier()
	{
	return Replacer.unfold_olivier(string);
	}

	@Benchmark
	public String unfold_olivier_optimise_par_emmanuel()
	{
	return Replacer.unfold_olivier_optimise_par_emmanuel(string);
	}

	public static void main(String[] args) throws RunnerException, IOException
	{
	Options opt = new OptionsBuilder().include("." + ReplaceAllBench.class.getSimpleName() + ".")
	.warmupIterations(20)
	.warmupTime(TimeValue.seconds(1))
	.measurementIterations(20)
	.timeUnit(TimeUnit.NANOSECONDS)
	.forks(1)
	// .addProfiler(LinuxPerfAsmProfiler.class)
	.build();

	new Runner(opt).run();
	}
	}
Benchmark	(size) Mode	Cnt	Score	Error	Units
ReplaceAllBench.unfold_all_regexp	10 avgt	15	2300,867	± 702,534	ns/op
ReplaceAllBench.unfold_all_regexp	100 avgt	15	10303,298	± 202,785	ns/op
ReplaceAllBench.unfold_all_regexp	1000 avgt	15	94123,644	± 1649,926	ns/op
ReplaceAllBench.unfold_compiled_regexp	10 avgt	15	656,458	± 135,117	ns/op
ReplaceAllBench.unfold_compiled_regexp	100 avgt	15	3370,366	± 708,509	ns/op
ReplaceAllBench.unfold_compiled_regexp	1000 avgt	15	38239,589	± 11620,233	ns/op
ReplaceAllBench.unfold_original	10 avgt	15	1818,520	± 117,464	ns/op
ReplaceAllBench.unfold_original	100 avgt	15	10065,356	± 3338,643	ns/op
ReplaceAllBench.unfold_original	1000 avgt	15	82453,316	± 12327,339	ns/op
ReplaceAllBench.unfold_unfold_common	10 avgt	15	380,190	± 72,212	ns/op
ReplaceAllBench.unfold_unfold_common	100 avgt	15	1801,607	± 164,418	ns/op
ReplaceAllBench.unfold_unfold_common	1000 avgt	15	16738,866	± 1296,921	ns/op
ReplaceAllBench.unfold_very_complicated	10 avgt	15	213,194	± 25,204	ns/op
ReplaceAllBench.unfold_very_complicated	100 avgt	15	1102,179	± 114,323	ns/op
ReplaceAllBench.unfold_very_complicated	1000 avgt	15	9482,631	± 687,077	ns/op
ReplaceAllBench.unfold_unfold_olivier	10 avgt	15	98,947	± 7,024	ns/op
ReplaceAllBench.unfold_unfold_olivier	100 avgt	15	691,597	± 33,185	ns/op
ReplaceAllBench.unfold_unfold_olivier	1000 avgt	15	6269,420	± 407,698	ns/op