Created
October 2, 2019 11:20
-
-
Save pawer13/947162e6bcc0d1ab9e663d7d078c3d3f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package es.ortoplus.migration.util; | |
import org.apache.commons.lang3.StringUtils; | |
import org.springframework.lang.NonNull; | |
public class StringOperations { | |
public final static String ADDRESS_KEYWORDS[] = { | |
"BAJO,BJ,BJO,BAJIO,BAJOS,BJS,BAIXOS,BAXIO,BAJIOS", | |
"LOCAL,LC,LCAL,LOCALES,LOCA,LCA,LCOAL", | |
"PUERTA,PTA,PUERT ,PURTA ", | |
"EDIFICIO,EDF,EDIF,EDIFI,EDIFIC", | |
"DCHA,DRCHA,DERECHA", | |
"BLOQUE,BLQ,BLOQ,BLOQE,BLOQES,BLOQUES", | |
"IZQ,IZDA,IQZ,IZQUIERDA,IZQD,IZQA,IZQDA", | |
"PLANTA,PLTA,PLANT", | |
"CALLE,C\\\\,C/,CARRER,C", | |
"PLAZA,PLAÇA,PL,PLZ", | |
"AVENIDA,AVDA,AVD,AVD,AV", | |
"CARRETERA,CTRA", | |
"Nº,NUMERO,NUM"}; | |
public final static String NAME_KEYWORDS[] = { | |
",DOCTORA,DOCTOR,DRA,DR", | |
"MARIA,Mª", | |
"FRANCISCO,FRAN,FCO", | |
",CLINICA", | |
",D,S,L"}; | |
public static String cleanAddress(@NonNull String address) { | |
return cleanString(cleanString(address, ADDRESS_KEYWORDS), NAME_KEYWORDS); | |
} | |
public static String cleanString(@NonNull String s, @NonNull String[] keywords) { | |
String result = StringUtils.stripAccents(s.toUpperCase()); | |
String [] fieldRest=result.split("[ \\.,;]"); | |
for (String kw: keywords) { | |
String words[] = kw.split(","); | |
for (int j = 1; j < words.length; j++) { | |
for (int index=0;index<fieldRest.length;index++) { | |
if (fieldRest[index].equals(words[j])) { | |
fieldRest[index]=words[0]; | |
} | |
} | |
} | |
} | |
return String.join(" ", fieldRest).trim(); | |
} | |
public static String cleanName(@NonNull String name) { | |
return cleanString(name, NAME_KEYWORDS); | |
} | |
public static double addressSimilarity(@NonNull String a, @NonNull String b) { | |
return stringSimilarity(cleanAddress(a), cleanAddress(b)); | |
} | |
public static double stringSimilarity(@NonNull String a,@NonNull String b) { | |
String[] splittedA = a.split(" "); | |
String[] splittedB = b.split(" "); | |
int score = 0; | |
for (String s : splittedA) { | |
for (String t : splittedB) { | |
if (s.equals(t)) { | |
score++; | |
} | |
} | |
} | |
int size = Integer.max(splittedA.length, splittedB.length); | |
return (double) score / size; | |
} | |
/** | |
* Compares two strings ignoring case and trailing spaces | |
* | |
* @param a | |
* @param b | |
* @return true if both are null or if StringUtils.equalsIgnoreCase(a.trim(), | |
* b.trim()) is true | |
*/ | |
public static boolean equalTrimmedStrings(String a, String b) { | |
if (a == null && b == null) { | |
return true; | |
} | |
if (a == null || b == null) { | |
return false; | |
} | |
return StringUtils.equalsIgnoreCase(a.trim(), b.trim()); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment