Last active
November 13, 2019 14:58
-
-
Save fadziljusri/18924663f042b4bea1481a86feca0c8b to your computer and use it in GitHub Desktop.
Desctruct Address component from input String
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const DEFINED = require('./malaysia'); | |
String.prototype.toTitleCase = function () { | |
return this.toLowerCase().replace(/\b(\w)/g, s => s.toUpperCase()); | |
} | |
String.prototype.formatAdrressText = function() { | |
return this.replace(/[,.\n]/g, "") // remove unwanted chars | |
.toLowerCase() | |
.replace(new RegExp("malaysia", 'g'), "") // unnecessary | |
.replace(/\s{2,}/g, " ") // remove extra spaces | |
.trim(); | |
}; | |
// split original text received without formatting | |
String.prototype.mapAddressInput = function () { | |
return this | |
.toLowerCase() | |
.split(",") | |
.map(e => e.replace(/\s{2,}/g, ' ').trim()); | |
} | |
/*** | |
* @param {String} [e] | |
* @return {Boolean} | |
*/ | |
function isPostcode(e) { | |
return ( | |
typeof parseFloat(e) === "number" && !isNaN(e) && 1000 <= e && e <= 98859 | |
); | |
} | |
/*** | |
* @param {String} [e] | |
* @return {Boolean} | |
*/ | |
function isStreet(e) { | |
let street = { | |
"jalan": 1, | |
"jln": 1, | |
"persiaran": 1, | |
"lorong": 1 | |
} | |
return street[e] === 1; | |
} | |
/*** | |
* @param {String} [e] | |
* @return {Boolean} | |
*/ | |
function isApt(e) { | |
return e === "no" | |
} | |
/*** | |
* @param {String} [e] | |
* @return {Boolean} | |
*/ | |
function isNextAptSeries(e) { | |
e = e.replace(/\D+/g, ""); // \D non digits | |
return !isNaN(parseFloat(e)) && isFinite(e); | |
} | |
/*** | |
* Get defined (state, city) | |
* @param {Object} [address] | |
* @param {Integer} [index] - index of array need to check | |
* @param {Integer} [maxSyllables] - max count syllables (e.g. 'kuala lumpur': 2, 'pahang': '1' ) | |
* @return {Object} {...address, city, state} | |
*/ | |
Array.prototype.getDefinedElement = function (address, index, maxSyllables = 2) { | |
for (let countSyl = maxSyllables; countSyl >= 1; countSyl--) { | |
let text = this.slice(index, index + countSyl).join(" "); | |
if (DEFINED[text] && !address[DEFINED[text]]) { | |
address[DEFINED[text]] = text.toTitleCase(); | |
this.splice(index, countSyl); | |
} | |
} | |
return address; | |
} | |
function destructAddress(text) { | |
let unformattedUnk = text.mapAddressInput(); | |
let unk = text.formatAdrressText().split(" "); | |
let index = 0, address = {}, streetIndex; | |
while (unk[index]) { | |
let element = unk[index]; | |
// console.log("check", element); | |
if (isApt(element)) { | |
let countAptSeries = 1, nextAptSeries = unk[index + 1]; | |
while (isNextAptSeries(nextAptSeries)) { | |
countAptSeries += 1; | |
nextAptSeries = unk[index + countAptSeries]; | |
} | |
address['apt'] = unk.splice(index, countAptSeries).join(" ").toTitleCase(); | |
continue; | |
} | |
if (isPostcode(element)) { | |
address['postcode'] = element.toTitleCase(); | |
unk.splice(index, 1); | |
continue; | |
} | |
address = unk.getDefinedElement(address, index); | |
if (isStreet(element)) { | |
streetIndex = index; // check after looping | |
} | |
if (element === unk[index]) index += 1; | |
} | |
// WORST CASE!!! Only success if street is wrap between ',' | |
if (streetIndex !== undefined) { | |
let street = unformattedUnk.find(e => e.includes(unk[streetIndex])); | |
if (street) { | |
let streetCount = street.split(" ").length; | |
let checkStreetSeq = unk.slice(streetIndex, streetIndex + streetCount).join(" "); | |
if (street === checkStreetSeq) { | |
street = unk.splice(streetIndex, streetCount).join(" ").toTitleCase(); | |
address["street"] = street; | |
} | |
} | |
} | |
address['section'] = unk.join(" ").toTitleCase(); | |
return address; | |
} | |
// let input = " No 11, Chendering, 21080 Kuala Terengganu, Terengganu."; | |
let input = `No 10-2-6 PANGSAPURI BULAN, Jalan 18/39A, taman sri sinar, 51201 Kuala Lumpur.`; | |
// let input = `No L-4-1, Solaris Mont Kiara, Jalan Solaris, Kuala Lumpur 50480, Malaysia`; | |
let result = destructAddress(input); | |
console.log(result); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const MALAYSIA = { | |
// STATES | |
"johor": "state", | |
"kedah": "state", | |
"kelantan": "state", | |
"melaka": "state", | |
"negeri sembilan": "state", | |
"pahang": "state", | |
"perak": "state", | |
"perlis": "state", | |
"pulau pinang": "state", | |
"sarawak": "state", | |
"selangor": "state", | |
"terengganu": "state", | |
"sabah": "state", | |
"Wilayah Persekutuan": "state", | |
// CITIES | |
"johor bahru": "city", | |
"tebrau": "city", | |
"pasir gudang": "city", | |
"bukit indah": "city", | |
"skudai": "city", | |
"kluang": "city", | |
"batu pahat": "city", | |
"muar": "city", | |
"ulu tiram": "city", | |
"senai": "city", | |
"segamat": "city", | |
"kulai": "city", | |
"kota tinggi": "city", | |
"pontian kechil": "city", | |
"tangkak": "city", | |
"bukit bakri": "city", | |
"yong peng": "city", | |
"pekan nenas": "city", | |
"labis": "city", | |
"mersing": "city", | |
"simpang renggam": "city", | |
"parit raja": "city", | |
"kelapa sawit": "city", | |
"buloh kasap": "city", | |
"chaah": "city", | |
"sungai petani": "city", | |
"alor setar": "city", | |
"kulim": "city", | |
"jitra / kubang pasu": "city", | |
"baling": "city", | |
"pendang": "city", | |
"langkawi": "city", | |
"yan": "city", | |
"sik": "city", | |
"kuala nerang": "city", | |
"pokok sena": "city", | |
"bandar baharu": "city", | |
"kota bharu": "city", | |
"pangkal kalong": "city", | |
"tanah merah": "city", | |
"peringat": "city", | |
"wakaf baru": "city", | |
"kadok": "city", | |
"pasir mas": "city", | |
"gua musang": "city", | |
"kuala krai": "city", | |
"tumpat": "city", | |
"bandaraya melaka": "city", | |
"bukit baru": "city", | |
"ayer keroh": "city", | |
"klebang": "city", | |
"masjid tanah": "city", | |
"sungai udang": "city", | |
"batu berendam": "city", | |
"alor gajah": "city", | |
"bukit rambai": "city", | |
"ayer molek": "city", | |
"bemban": "city", | |
"kuala sungai baru": "city", | |
"pulau sebang": "city", | |
"seremban": "city", | |
"port dickson": "city", | |
"nilai": "city", | |
"bahau": "city", | |
"tampin": "city", | |
"kuala pilah": "city", | |
"kuantan": "city", | |
"temerloh": "city", | |
"bentong": "city", | |
"mentakab": "city", | |
"raub": "city", | |
"jerantut": "city", | |
"pekan": "city", | |
"kuala lipis": "city", | |
"bandar jengka": "city", | |
"bukit tinggi": "city", | |
"ipoh": "city", | |
"taiping": "city", | |
"sitiawan": "city", | |
"simpang empat": "city", | |
"teluk intan": "city", | |
"batu gajah": "city", | |
"lumut": "city", | |
"kampung koh": "city", | |
"kuala kangsar": "city", | |
"sungai siput utara": "city", | |
"tapah": "city", | |
"bidor": "city", | |
"parit buntar": "city", | |
"ayer tawar": "city", | |
"bagan serai": "city", | |
"tanjung malim": "city", | |
"lawan kuda baharu": "city", | |
"pantai remis": "city", | |
"kampar": "city", | |
"kangar": "city", | |
"kuala perlis": "city", | |
"bukit mertajam": "city", | |
"georgetown": "city", | |
"sungai ara": "city", | |
"gelugor": "city", | |
"ayer itam": "city", | |
"butterworth": "city", | |
"val d’or": "city", | |
"perai": "city", | |
"nibong tebal": "city", | |
"permatang kucing": "city", | |
"tanjung tokong": "city", | |
"kepala batas": "city", | |
"tanjung bungah": "city", | |
"juru": "city", | |
"kota kinabalu": "city", | |
"sandakan": "city", | |
"tawau": "city", | |
"lahad datu": "city", | |
"keningau": "city", | |
"putatan": "city", | |
"donggongon": "city", | |
"semporna": "city", | |
"kudat": "city", | |
"kunak": "city", | |
"papar": "city", | |
"ranau": "city", | |
"beaufort": "city", | |
"kinarut": "city", | |
"kota belud": "city", | |
"kuching": "city", | |
"miri": "city", | |
"sibu": "city", | |
"bintulu": "city", | |
"limbang": "city", | |
"sarikei": "city", | |
"sri aman": "city", | |
"kapit": "city", | |
"batu delapan bazaar": "city", | |
"kota samarahan": "city", | |
"subang jaya": "city", | |
"klang": "city", | |
"ampang jaya": "city", | |
"shah alam": "city", | |
"petaling jaya": "city", | |
"cheras": "city", | |
"kajang": "city", | |
"selayang baru": "city", | |
"rawang": "city", | |
"taman greenwood": "city", | |
"semenyih": "city", | |
"banting": "city", | |
"balakong": "city", | |
"gombak setia": "city", | |
"kuala selangor": "city", | |
"serendah": "city", | |
"bukit beruntung": "city", | |
"pengkalan kundang": "city", | |
"jenjarom": "city", | |
"sungai besar": "city", | |
"batu arang": "city", | |
"tanjung sepat": "city", | |
"kuang": "city", | |
"kuala kubu baharu": "city", | |
"batang berjuntai": "city", | |
"bandar baru salak tinggi": "city", | |
"sekinchan": "city", | |
"sabak": "city", | |
"tanjung karang": "city", | |
"beranang": "city", | |
"sungai pelek": "city", | |
"kuala terengganu": "city", | |
"chukai": "city", | |
"dungun": "city", | |
"kerteh": "city", | |
"kuala berang": "city", | |
"marang": "city", | |
"paka": "city", | |
"jerteh": "city", | |
"kuala lumpur": "city", | |
"labuan": "city", | |
"putrajaya": "city" | |
} | |
module.exports = MALAYSIA |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment