Last active
April 8, 2021 07:03
-
-
Save ufuk/b97df4b88c6241eec235e327d4c9ea0d to your computer and use it in GitHub Desktop.
Curated text utils for specific use cases
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.commons.lang3.StringUtils; | |
import java.text.Normalizer; | |
import java.util.Arrays; | |
import java.util.List; | |
import java.util.Locale; | |
import java.util.regex.Matcher; | |
import java.util.regex.Pattern; | |
import java.util.stream.Collectors; | |
public final class TextUtils { | |
private TextUtils() { | |
} | |
public static String mask(String text, String placeHolderSequence, int start, int end) { | |
return StringUtils.overlay(text, StringUtils.repeat(placeHolderSequence, text.length() - (start + end)), start, text.length() - end); | |
} | |
public static String maskBankCard(String cardNumber) { | |
return mask(cardNumber, "*", 6, 0); | |
} | |
public static String removeWordsFromEndUntilLimitMatches(String text, int maximumCharacterLimit) { | |
while (text.length() > maximumCharacterLimit) { | |
text = removeLastWordFromEnd(text); | |
} | |
return text; | |
} | |
public static String removeLastWordFromEnd(String text) { | |
final List<String> words = Arrays.stream(StringUtils.split(text.trim(), StringUtils.SPACE)) | |
.map(String::trim) | |
.filter(StringUtils::isNotBlank) | |
.collect(Collectors.toList()); | |
return String.join(StringUtils.SPACE, words.subList(0, words.size() - 1)); | |
} | |
public static String stripAccents(String text) { | |
return Normalizer.normalize(text, Normalizer.Form.NFD) | |
.replaceAll("[\\p{InCombiningDiacriticalMarks}]", StringUtils.EMPTY); | |
} | |
public static String toBookmarkableText(String input) { | |
String output = input.replaceAll("[ı]", "i"); | |
output = Normalizer.normalize(output, Normalizer.Form.NFD); | |
output = Pattern.compile("\\p{InCombiningDiacriticalMarks}+").matcher(output).replaceAll(StringUtils.EMPTY); | |
output = Pattern.compile("[^a-zA-Z0-9]").matcher(output).replaceAll(StringUtils.SPACE).trim(); | |
output = Pattern.compile("\\s+").matcher(output).replaceAll("-"); | |
return output.toLowerCase(Locale.ENGLISH); | |
} | |
public static String replaceIfContainsWithinBoundaries(String sourceText, String startsWith, String endsWith, String contains, String replaceWith) { | |
final Pattern pattern = Pattern.compile(startsWith + "((?!" + startsWith + ").)*?" + contains + "((?!" + endsWith + ").)*?" + endsWith); | |
final Matcher matcher = pattern.matcher(sourceText); | |
return matcher.replaceAll(replaceWith); | |
} | |
public static String removeIfContainsWithinBoundaries(String sourceText, String startsWith, String endsWith, String contains) { | |
return replaceIfContainsWithinBoundaries(sourceText, startsWith, endsWith, contains, StringUtils.EMPTY); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.junit.jupiter.api.Test; | |
import java.util.Locale; | |
import static org.assertj.core.api.Assertions.assertThat; | |
class TextUtilsTests { | |
@Test | |
void shouldMask() { | |
assertThat(TextUtils.mask("5555666677778888", "-", 4, 2)).isEqualTo("5555----------88"); | |
} | |
@Test | |
void shouldMaskBankCard() { | |
assertThat(TextUtils.maskBankCard("1111222233334444")).isEqualTo("111122**********"); | |
} | |
@Test | |
void shouldNotRemoveWordFromEndUntilLimitMatchesWhenAlreadyLimitMatches() { | |
assertThat(TextUtils.removeWordsFromEndUntilLimitMatches("Ali ata bak", 11)) | |
.isEqualTo("Ali ata bak"); | |
assertThat(TextUtils.removeWordsFromEndUntilLimitMatches("Ali ata bak", 12)) | |
.isEqualTo("Ali ata bak"); | |
} | |
@Test | |
void shouldRemoveWordFromEndUntilLimitMatches() { | |
assertThat(TextUtils.removeWordsFromEndUntilLimitMatches("Ali ata bak", 10)) | |
.isEqualTo("Ali ata"); | |
assertThat(TextUtils.removeWordsFromEndUntilLimitMatches("Ali ata bak", 5)) | |
.isEqualTo("Ali"); | |
assertThat(TextUtils.removeWordsFromEndUntilLimitMatches("Ali ata bak", 3)) | |
.isEqualTo("Ali"); | |
} | |
@Test | |
void shouldIgnoreSpacesAndRemoveWordFromEndUntilLimitMatches() { | |
assertThat(TextUtils.removeWordsFromEndUntilLimitMatches(" Ali ata bak ", 10)) | |
.isEqualTo("Ali ata"); | |
assertThat(TextUtils.removeWordsFromEndUntilLimitMatches("Ali ata bak ", 5)) | |
.isEqualTo("Ali"); | |
assertThat(TextUtils.removeWordsFromEndUntilLimitMatches(" Ali ata bak", 3)) | |
.isEqualTo("Ali"); | |
} | |
@Test | |
void shouldStripAccents() { | |
assertThat(TextUtils.stripAccents("ŞaŞKıN-Çocuk")) | |
.isEqualTo("SaSKıN-Cocuk"); | |
assertThat(TextUtils.stripAccents("ŞaŞKıN-Çocuk").toUpperCase(Locale.ENGLISH)) | |
.isEqualTo("SASKIN-COCUK"); | |
} | |
@Test | |
void shouldNormalizeToBookmarkableFormat() { | |
assertThat(TextUtils.toBookmarkableText("ŞaŞKıN-Çocuk")) | |
.isEqualTo("saskin-cocuk"); | |
assertThat(TextUtils.toBookmarkableText(" AyaĞı İncinmiş - ? Ağlamış-çöCük_ ___128?\\//()[]*0*37__abzürt")) | |
.isEqualTo("ayagi-incinmis-aglamis-cocuk-128-0-37-abzurt"); | |
} | |
@Test | |
void shouldReplaceIfContainsWithinBoundaries() { | |
final String text = "Bu bir listedir <li style=\"box-sizing: border-box;\">50 TL ve Üzeri Alışverişlerde KARGO ÜCRETSİZ</li> amanın " + | |
"Bu ise bir divdir <div>boş</div> " + | |
"Bu ise başka bir listedir <li>stilsiz bir liste</li>"; | |
assertThat(TextUtils.replaceIfContainsWithinBoundaries(text, "<li", "/li>", "KARGO ÜCRETSİZ", "LİSTE UÇTU")) | |
.isEqualTo("Bu bir listedir LİSTE UÇTU amanın " + | |
"Bu ise bir divdir <div>boş</div> " + | |
"Bu ise başka bir listedir <li>stilsiz bir liste</li>"); | |
assertThat(TextUtils.replaceIfContainsWithinBoundaries("Denemeci <li>liste elemanı 1</li> <li style=\"display: none;\">liste elemanı stilli</li> denemeci", ">", "<", "liste elemanı", "içi boş")) | |
.isEqualTo("Denemeci <liiçi boş/li> <li style=\"display: none;\"içi boş/li> denemeci"); | |
} | |
@Test | |
void shouldRemoveIfContainsWithinBoundaries() { | |
final String text = "Bu bir listedir <li style=\"box-sizing: border-box;\">50 TL ve Üzeri Alışverişlerde KARGO ÜCRETSİZ</li> amanın " + | |
"Bu ise bir divdir <div>boş</div> " + | |
"Bu ise başka bir listedir <li>stilsiz bir liste</li>"; | |
assertThat(TextUtils.removeIfContainsWithinBoundaries(text, "<li", "/li>", "KARGO ÜCRETSİZ")) | |
.isEqualTo("Bu bir listedir amanın " + | |
"Bu ise bir divdir <div>boş</div> " + | |
"Bu ise başka bir listedir <li>stilsiz bir liste</li>"); | |
assertThat(TextUtils.removeIfContainsWithinBoundaries(text, "<li", "/li>", "stilsiz")) | |
.isEqualTo("Bu bir listedir <li style=\"box-sizing: border-box;\">50 TL ve Üzeri Alışverişlerde KARGO ÜCRETSİZ</li> amanın " + | |
"Bu ise bir divdir <div>boş</div> " + | |
"Bu ise başka bir listedir "); | |
assertThat(TextUtils.removeIfContainsWithinBoundaries(text, "<", ">", "boş")) | |
.isEqualTo("Bu bir listedir <li style=\"box-sizing: border-box;\">50 TL ve Üzeri Alışverişlerde KARGO ÜCRETSİZ</li> amanın " + | |
"Bu ise bir divdir " + | |
"Bu ise başka bir listedir <li>stilsiz bir liste</li>"); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment