Skip to content

Instantly share code, notes, and snippets.

@pawer13
Created October 2, 2019 11:20
Show Gist options
  • Save pawer13/947162e6bcc0d1ab9e663d7d078c3d3f to your computer and use it in GitHub Desktop.
Save pawer13/947162e6bcc0d1ab9e663d7d078c3d3f to your computer and use it in GitHub Desktop.
package es.ortoplus.migration.util;
import org.apache.commons.lang3.StringUtils;
import org.springframework.lang.NonNull;
public class StringOperations {
public final static String ADDRESS_KEYWORDS[] = {
"BAJO,BJ,BJO,BAJIO,BAJOS,BJS,BAIXOS,BAXIO,BAJIOS",
"LOCAL,LC,LCAL,LOCALES,LOCA,LCA,LCOAL",
"PUERTA,PTA,PUERT ,PURTA ",
"EDIFICIO,EDF,EDIF,EDIFI,EDIFIC",
"DCHA,DRCHA,DERECHA",
"BLOQUE,BLQ,BLOQ,BLOQE,BLOQES,BLOQUES",
"IZQ,IZDA,IQZ,IZQUIERDA,IZQD,IZQA,IZQDA",
"PLANTA,PLTA,PLANT",
"CALLE,C\\\\,C/,CARRER,C",
"PLAZA,PLAÇA,PL,PLZ",
"AVENIDA,AVDA,AVD,AVD,AV",
"CARRETERA,CTRA",
"Nº,NUMERO,NUM"};
public final static String NAME_KEYWORDS[] = {
",DOCTORA,DOCTOR,DRA,DR",
"MARIA,Mª",
"FRANCISCO,FRAN,FCO",
",CLINICA",
",D,S,L"};
public static String cleanAddress(@NonNull String address) {
return cleanString(cleanString(address, ADDRESS_KEYWORDS), NAME_KEYWORDS);
}
public static String cleanString(@NonNull String s, @NonNull String[] keywords) {
String result = StringUtils.stripAccents(s.toUpperCase());
String [] fieldRest=result.split("[ \\.,;]");
for (String kw: keywords) {
String words[] = kw.split(",");
for (int j = 1; j < words.length; j++) {
for (int index=0;index<fieldRest.length;index++) {
if (fieldRest[index].equals(words[j])) {
fieldRest[index]=words[0];
}
}
}
}
return String.join(" ", fieldRest).trim();
}
public static String cleanName(@NonNull String name) {
return cleanString(name, NAME_KEYWORDS);
}
public static double addressSimilarity(@NonNull String a, @NonNull String b) {
return stringSimilarity(cleanAddress(a), cleanAddress(b));
}
public static double stringSimilarity(@NonNull String a,@NonNull String b) {
String[] splittedA = a.split(" ");
String[] splittedB = b.split(" ");
int score = 0;
for (String s : splittedA) {
for (String t : splittedB) {
if (s.equals(t)) {
score++;
}
}
}
int size = Integer.max(splittedA.length, splittedB.length);
return (double) score / size;
}
/**
* Compares two strings ignoring case and trailing spaces
*
* @param a
* @param b
* @return true if both are null or if StringUtils.equalsIgnoreCase(a.trim(),
* b.trim()) is true
*/
public static boolean equalTrimmedStrings(String a, String b) {
if (a == null && b == null) {
return true;
}
if (a == null || b == null) {
return false;
}
return StringUtils.equalsIgnoreCase(a.trim(), b.trim());
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment