Skip to content

Instantly share code, notes, and snippets.

@sandwwraith
Last active October 31, 2017 22:15
Show Gist options
  • Save sandwwraith/5744dd07fa08cbe47f92dfb41ec0e86b to your computer and use it in GitHub Desktop.
Save sandwwraith/5744dd07fa08cbe47f92dfb41ec0e86b to your computer and use it in GitHub Desktop.
/*
* Copyright (c) 2014, Oracle America, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name of Oracle nor the names of its contributors may be used
* to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.sample;
import org.openjdk.jmh.annotations.*;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;
/**
Benchmark Mode Cnt Score Error Units
MyBenchmark.testAllEnglishArray avgt 20 0.141 ± 0.007 us/op
MyBenchmark.testAllEnglishMap avgt 20 0.664 ± 0.011 us/op
MyBenchmark.testAllEnglishUB avgt 20 10.780 ± 0.144 us/op
MyBenchmark.testAllRussiansArray avgt 20 0.344 ± 0.005 us/op
MyBenchmark.testAllRussiansMap avgt 20 0.823 ± 0.053 us/op
MyBenchmark.testAllRussiansUB avgt 20 15.692 ± 0.586 us/op
*/
@Fork(value = 2, warmups = 2)
@BenchmarkMode(Mode.AverageTime)
@Warmup(iterations = 10)
@Measurement(iterations = 10)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public class UselessTranslitBenchmark {
public static String cyrillicToLatin(String lat) {
if (lat == null) {
return lat;
}
lat = lat.replaceAll("ё", "yo");
lat = lat.replaceAll("а", "a");
lat = lat.replaceAll("б", "b");
lat = lat.replaceAll("в", "v");
lat = lat.replaceAll("г", "g");
lat = lat.replaceAll("д", "d");
lat = lat.replaceAll("е", "e");
lat = lat.replaceAll("ж", "zh");
lat = lat.replaceAll("з", "z");
lat = lat.replaceAll("и", "i");
lat = lat.replaceAll("й", "y");
lat = lat.replaceAll("к", "k");
lat = lat.replaceAll("л", "l");
lat = lat.replaceAll("м", "m");
lat = lat.replaceAll("н", "n");
lat = lat.replaceAll("о", "o");
lat = lat.replaceAll("п", "p");
lat = lat.replaceAll("р", "r");
lat = lat.replaceAll("с", "s");
lat = lat.replaceAll("т", "t");
lat = lat.replaceAll("у", "u");
lat = lat.replaceAll("ф", "f");
lat = lat.replaceAll("х", "h");
lat = lat.replaceAll("ц", "c");
lat = lat.replaceAll("ч", "ch");
lat = lat.replaceAll("ш", "sh");
lat = lat.replaceAll("х", "shch");
lat = lat.replaceAll("ь", "");
lat = lat.replaceAll("ы", "y");
lat = lat.replaceAll("ъ", "");
lat = lat.replaceAll("э", "e");
lat = lat.replaceAll("ю", "yu");
lat = lat.replaceAll("я", "ya");
lat = lat.replaceAll("Ё", "yo");
lat = lat.replaceAll("А", "a");
lat = lat.replaceAll("Б", "b");
lat = lat.replaceAll("В", "v");
lat = lat.replaceAll("Г", "g");
lat = lat.replaceAll("Д", "d");
lat = lat.replaceAll("Е", "e");
lat = lat.replaceAll("Ж", "zh");
lat = lat.replaceAll("З", "z");
lat = lat.replaceAll("И", "i");
lat = lat.replaceAll("Й", "y");
lat = lat.replaceAll("К", "k");
lat = lat.replaceAll("Л", "l");
lat = lat.replaceAll("М", "m");
lat = lat.replaceAll("Н", "n");
lat = lat.replaceAll("О", "o");
lat = lat.replaceAll("П", "p");
lat = lat.replaceAll("Р", "r");
lat = lat.replaceAll("С", "s");
lat = lat.replaceAll("Т", "t");
lat = lat.replaceAll("У", "u");
lat = lat.replaceAll("Ф", "f");
lat = lat.replaceAll("Х", "h");
lat = lat.replaceAll("Ц", "c");
lat = lat.replaceAll("Ч", "ch");
lat = lat.replaceAll("Ш", "sh");
lat = lat.replaceAll("Х", "shch");
lat = lat.replaceAll("Ь", "");
lat = lat.replaceAll("Ы", "y");
lat = lat.replaceAll("Ъ", "");
lat = lat.replaceAll("Э", "e");
lat = lat.replaceAll("Ю", "yu");
lat = lat.replaceAll("Я", "ya");
return lat;
}
private static final Map<Character, String> letters = new HashMap<>();
static {
letters.put('А', "A");
letters.put('Б', "B");
letters.put('В', "V");
letters.put('Г', "G");
letters.put('Д', "D");
letters.put('Е', "E");
letters.put('Ё', "E");
letters.put('Ж', "ZH");
letters.put('З', "Z");
letters.put('И', "I");
letters.put('Й', "I");
letters.put('К', "K");
letters.put('Л', "L");
letters.put('М', "M");
letters.put('Н', "N");
letters.put('О', "O");
letters.put('П', "P");
letters.put('Р', "R");
letters.put('С', "S");
letters.put('Т', "T");
letters.put('У', "U");
letters.put('Ф', "F");
letters.put('Х', "H");
letters.put('Ц', "C");
letters.put('Ч', "CH");
letters.put('Ш', "SH");
letters.put('Щ', "SH");
letters.put('Ъ', "'");
letters.put('Ы', "Y");
letters.put('Ъ', "'");
letters.put('Э', "E");
letters.put('Ю', "U");
letters.put('Я', "YA");
letters.put('а', "a");
letters.put('б', "b");
letters.put('в', "v");
letters.put('г', "g");
letters.put('д', "d");
letters.put('е', "e");
letters.put('ё', "e");
letters.put('ж', "zh");
letters.put('з', "z");
letters.put('и', "i");
letters.put('й', "i");
letters.put('к', "k");
letters.put('л', "l");
letters.put('м', "m");
letters.put('н', "n");
letters.put('о', "o");
letters.put('п', "p");
letters.put('р', "r");
letters.put('с', "s");
letters.put('т', "t");
letters.put('у', "u");
letters.put('ф', "f");
letters.put('х', "h");
letters.put('ц', "c");
letters.put('ч', "ch");
letters.put('ш', "sh");
letters.put('щ', "sh");
letters.put('ъ', "'");
letters.put('ы', "y");
letters.put('ъ', "'");
letters.put('э', "e");
letters.put('ю', "u");
letters.put('я', "ya");
}
public static String toTranslit(String text) {
StringBuilder sb = new StringBuilder(text.length());
for (int i = 0; i < text.length(); i++) {
Character c = text.charAt(i);
sb.append(letters.getOrDefault(c, c.toString()));
}
return sb.toString();
}
private static final String[] charTable = new String[65535];
static {
charTable['А'] = "A";
charTable['Б'] = "B";
charTable['В'] = "V";
charTable['Г'] = "G";
charTable['Д'] = "D";
charTable['Е'] = "E";
charTable['Ё'] = "E";
charTable['Ж'] = "ZH";
charTable['З'] = "Z";
charTable['И'] = "I";
charTable['Й'] = "I";
charTable['К'] = "K";
charTable['Л'] = "L";
charTable['М'] = "M";
charTable['Н'] = "N";
charTable['О'] = "O";
charTable['П'] = "P";
charTable['Р'] = "R";
charTable['С'] = "S";
charTable['Т'] = "T";
charTable['У'] = "U";
charTable['Ф'] = "F";
charTable['Х'] = "H";
charTable['Ц'] = "C";
charTable['Ч'] = "CH";
charTable['Ш'] = "SH";
charTable['Щ'] = "SH";
charTable['Ъ'] = "'";
charTable['Ы'] = "Y";
charTable['Ь'] = "'";
charTable['Э'] = "E";
charTable['Ю'] = "U";
charTable['Я'] = "YA";
for (int i = 0; i < charTable.length; i++) {
if (charTable[i] != null) {
char lower = new String(new char[] {(char) i}).toLowerCase().charAt(0);
charTable[lower] = charTable[i].toLowerCase();
}
}
}
public static String toTranslitNew(String text) {
char charBuffer[] = text.toCharArray();
StringBuilder sb = new StringBuilder(text.length());
for (char aCharBuffer : charBuffer) {
String replace = charTable[aCharBuffer];
if (replace == null) {
sb.append(aCharBuffer);
} else {
sb.append(replace);
}
}
return sb.toString();
}
@Benchmark
public String testAllRussiansUB() {
return cyrillicToLatin("фыовилФФЫПОВФывитмдилепщыоашыФЫВыоф");
}
@Benchmark
public String testAllEnglishUB() {
return cyrillicToLatin("adsajhshkadSFBjsajAKSJfhakjsKJAsdha");
}
@Benchmark
public String testAllRussiansMap() {
return toTranslit("фыовилФФЫПОВФывитмдилепщыоашыФЫВыоф");
}
@Benchmark
public String testAllEnglishMap() {
return toTranslit("adsajhshkadSFBjsajAKSJfhakjsKJAsdha");
}
@Benchmark
public String testAllRussiansArray() {
return toTranslitNew("фыовилФФЫПОВФывитмдилепщыоашыФЫВыоф");
}
@Benchmark
public String testAllEnglishArray() {
return toTranslitNew("adsajhshkadSFBjsajAKSJfhakjsKJAsdha");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment