Skip to content

Instantly share code, notes, and snippets.

@fadziljusri
Last active November 13, 2019 14:58
Show Gist options
  • Save fadziljusri/18924663f042b4bea1481a86feca0c8b to your computer and use it in GitHub Desktop.
Save fadziljusri/18924663f042b4bea1481a86feca0c8b to your computer and use it in GitHub Desktop.
Desctruct Address component from input String
const DEFINED = require('./malaysia');
String.prototype.toTitleCase = function () {
return this.toLowerCase().replace(/\b(\w)/g, s => s.toUpperCase());
}
String.prototype.formatAdrressText = function() {
return this.replace(/[,.\n]/g, "") // remove unwanted chars
.toLowerCase()
.replace(new RegExp("malaysia", 'g'), "") // unnecessary
.replace(/\s{2,}/g, " ") // remove extra spaces
.trim();
};
// split original text received without formatting
String.prototype.mapAddressInput = function () {
return this
.toLowerCase()
.split(",")
.map(e => e.replace(/\s{2,}/g, ' ').trim());
}
/***
* @param {String} [e]
* @return {Boolean}
*/
function isPostcode(e) {
return (
typeof parseFloat(e) === "number" && !isNaN(e) && 1000 <= e && e <= 98859
);
}
/***
* @param {String} [e]
* @return {Boolean}
*/
function isStreet(e) {
let street = {
"jalan": 1,
"jln": 1,
"persiaran": 1,
"lorong": 1
}
return street[e] === 1;
}
/***
* @param {String} [e]
* @return {Boolean}
*/
function isApt(e) {
return e === "no"
}
/***
* @param {String} [e]
* @return {Boolean}
*/
function isNextAptSeries(e) {
e = e.replace(/\D+/g, ""); // \D non digits
return !isNaN(parseFloat(e)) && isFinite(e);
}
/***
* Get defined (state, city)
* @param {Object} [address]
* @param {Integer} [index] - index of array need to check
* @param {Integer} [maxSyllables] - max count syllables (e.g. 'kuala lumpur': 2, 'pahang': '1' )
* @return {Object} {...address, city, state}
*/
Array.prototype.getDefinedElement = function (address, index, maxSyllables = 2) {
for (let countSyl = maxSyllables; countSyl >= 1; countSyl--) {
let text = this.slice(index, index + countSyl).join(" ");
if (DEFINED[text] && !address[DEFINED[text]]) {
address[DEFINED[text]] = text.toTitleCase();
this.splice(index, countSyl);
}
}
return address;
}
function destructAddress(text) {
let unformattedUnk = text.mapAddressInput();
let unk = text.formatAdrressText().split(" ");
let index = 0, address = {}, streetIndex;
while (unk[index]) {
let element = unk[index];
// console.log("check", element);
if (isApt(element)) {
let countAptSeries = 1, nextAptSeries = unk[index + 1];
while (isNextAptSeries(nextAptSeries)) {
countAptSeries += 1;
nextAptSeries = unk[index + countAptSeries];
}
address['apt'] = unk.splice(index, countAptSeries).join(" ").toTitleCase();
continue;
}
if (isPostcode(element)) {
address['postcode'] = element.toTitleCase();
unk.splice(index, 1);
continue;
}
address = unk.getDefinedElement(address, index);
if (isStreet(element)) {
streetIndex = index; // check after looping
}
if (element === unk[index]) index += 1;
}
// WORST CASE!!! Only success if street is wrap between ','
if (streetIndex !== undefined) {
let street = unformattedUnk.find(e => e.includes(unk[streetIndex]));
if (street) {
let streetCount = street.split(" ").length;
let checkStreetSeq = unk.slice(streetIndex, streetIndex + streetCount).join(" ");
if (street === checkStreetSeq) {
street = unk.splice(streetIndex, streetCount).join(" ").toTitleCase();
address["street"] = street;
}
}
}
address['section'] = unk.join(" ").toTitleCase();
return address;
}
// let input = " No 11, Chendering, 21080 Kuala Terengganu, Terengganu.";
let input = `No 10-2-6 PANGSAPURI BULAN, Jalan 18/39A, taman sri sinar, 51201 Kuala Lumpur.`;
// let input = `No L-4-1, Solaris Mont Kiara, Jalan Solaris, Kuala Lumpur 50480, Malaysia`;
let result = destructAddress(input);
console.log(result);
const MALAYSIA = {
// STATES
"johor": "state",
"kedah": "state",
"kelantan": "state",
"melaka": "state",
"negeri sembilan": "state",
"pahang": "state",
"perak": "state",
"perlis": "state",
"pulau pinang": "state",
"sarawak": "state",
"selangor": "state",
"terengganu": "state",
"sabah": "state",
"Wilayah Persekutuan": "state",
// CITIES
"johor bahru": "city",
"tebrau": "city",
"pasir gudang": "city",
"bukit indah": "city",
"skudai": "city",
"kluang": "city",
"batu pahat": "city",
"muar": "city",
"ulu tiram": "city",
"senai": "city",
"segamat": "city",
"kulai": "city",
"kota tinggi": "city",
"pontian kechil": "city",
"tangkak": "city",
"bukit bakri": "city",
"yong peng": "city",
"pekan nenas": "city",
"labis": "city",
"mersing": "city",
"simpang renggam": "city",
"parit raja": "city",
"kelapa sawit": "city",
"buloh kasap": "city",
"chaah": "city",
"sungai petani": "city",
"alor setar": "city",
"kulim": "city",
"jitra / kubang pasu": "city",
"baling": "city",
"pendang": "city",
"langkawi": "city",
"yan": "city",
"sik": "city",
"kuala nerang": "city",
"pokok sena": "city",
"bandar baharu": "city",
"kota bharu": "city",
"pangkal kalong": "city",
"tanah merah": "city",
"peringat": "city",
"wakaf baru": "city",
"kadok": "city",
"pasir mas": "city",
"gua musang": "city",
"kuala krai": "city",
"tumpat": "city",
"bandaraya melaka": "city",
"bukit baru": "city",
"ayer keroh": "city",
"klebang": "city",
"masjid tanah": "city",
"sungai udang": "city",
"batu berendam": "city",
"alor gajah": "city",
"bukit rambai": "city",
"ayer molek": "city",
"bemban": "city",
"kuala sungai baru": "city",
"pulau sebang": "city",
"seremban": "city",
"port dickson": "city",
"nilai": "city",
"bahau": "city",
"tampin": "city",
"kuala pilah": "city",
"kuantan": "city",
"temerloh": "city",
"bentong": "city",
"mentakab": "city",
"raub": "city",
"jerantut": "city",
"pekan": "city",
"kuala lipis": "city",
"bandar jengka": "city",
"bukit tinggi": "city",
"ipoh": "city",
"taiping": "city",
"sitiawan": "city",
"simpang empat": "city",
"teluk intan": "city",
"batu gajah": "city",
"lumut": "city",
"kampung koh": "city",
"kuala kangsar": "city",
"sungai siput utara": "city",
"tapah": "city",
"bidor": "city",
"parit buntar": "city",
"ayer tawar": "city",
"bagan serai": "city",
"tanjung malim": "city",
"lawan kuda baharu": "city",
"pantai remis": "city",
"kampar": "city",
"kangar": "city",
"kuala perlis": "city",
"bukit mertajam": "city",
"georgetown": "city",
"sungai ara": "city",
"gelugor": "city",
"ayer itam": "city",
"butterworth": "city",
"val d’or": "city",
"perai": "city",
"nibong tebal": "city",
"permatang kucing": "city",
"tanjung tokong": "city",
"kepala batas": "city",
"tanjung bungah": "city",
"juru": "city",
"kota kinabalu": "city",
"sandakan": "city",
"tawau": "city",
"lahad datu": "city",
"keningau": "city",
"putatan": "city",
"donggongon": "city",
"semporna": "city",
"kudat": "city",
"kunak": "city",
"papar": "city",
"ranau": "city",
"beaufort": "city",
"kinarut": "city",
"kota belud": "city",
"kuching": "city",
"miri": "city",
"sibu": "city",
"bintulu": "city",
"limbang": "city",
"sarikei": "city",
"sri aman": "city",
"kapit": "city",
"batu delapan bazaar": "city",
"kota samarahan": "city",
"subang jaya": "city",
"klang": "city",
"ampang jaya": "city",
"shah alam": "city",
"petaling jaya": "city",
"cheras": "city",
"kajang": "city",
"selayang baru": "city",
"rawang": "city",
"taman greenwood": "city",
"semenyih": "city",
"banting": "city",
"balakong": "city",
"gombak setia": "city",
"kuala selangor": "city",
"serendah": "city",
"bukit beruntung": "city",
"pengkalan kundang": "city",
"jenjarom": "city",
"sungai besar": "city",
"batu arang": "city",
"tanjung sepat": "city",
"kuang": "city",
"kuala kubu baharu": "city",
"batang berjuntai": "city",
"bandar baru salak tinggi": "city",
"sekinchan": "city",
"sabak": "city",
"tanjung karang": "city",
"beranang": "city",
"sungai pelek": "city",
"kuala terengganu": "city",
"chukai": "city",
"dungun": "city",
"kerteh": "city",
"kuala berang": "city",
"marang": "city",
"paka": "city",
"jerteh": "city",
"kuala lumpur": "city",
"labuan": "city",
"putrajaya": "city"
}
module.exports = MALAYSIA
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment