Last active
March 13, 2020 08:11
-
-
Save slav123/0a800d5dd7f1b2584097e9a5d6dd7b4a to your computer and use it in GitHub Desktop.
australia address parser based on js library - DEV
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
if ( ! function_exists('parse_address')) | |
{ | |
// https://github.com/gordonlkc/autralia-address-parser/blob/master/parser.js | |
/** | |
* match state | |
* | |
* @param string $address | |
* | |
* @return string | |
*/ | |
function _get_state(string &$address): string | |
{ | |
$state_code = [ | |
"New South Wales" => "NSW", | |
"Northern Territory " => "NT", | |
"Queensland" => "QLD", | |
"South Australia" => "SA", | |
"Tasmania" => "TAS", | |
"Victoria" => "VIC", | |
"Western Australia" => "WA" | |
]; | |
if (preg_match("/\b(?:" . join("|", array_values($state_code)) . ")\b/", $address, $match) > 0) | |
{ | |
return $match[0]; | |
} | |
else if (preg_match("\b(?:" . join("|", array_keys($state_code)) . ")\b", $address, $match) > 0) | |
{ | |
return $match[0]; | |
} | |
else | |
{ | |
return ""; | |
} | |
} | |
/** | |
* match postcode | |
* | |
* @param string $address | |
* | |
* @return int | |
*/ | |
function _get_postcode(string &$address): int | |
{ | |
if (preg_match("/(?<postcode>\\d{4})[- ]?(?<plus4>\\d{4})?/", $address, $match) > 0) | |
{ | |
return $match[0]; | |
} | |
else | |
{ | |
return 0; | |
} | |
} | |
/** | |
* parse whole address | |
* | |
* @param string $address | |
* | |
* @return array | |
*/ | |
function parse_address(string $address): array | |
{ | |
$postcode = _get_postcode($address); | |
$state = _get_state($address); | |
$suburb_and_state = _get_suburb_and_state($address, $state); | |
$street_type = _get_street_type($address); | |
$street_number = _get_street_number($address); | |
$fraction = _get_fraction($address); | |
$street_name = _get_street_name($address, $street_type, $street_number); | |
$suburb = trim(str_replace([$street_name, $state, $street_type], '', $suburb_and_state)); | |
$unit_type_numbered = _get_unit_type_numbered($address); | |
$unit_type_unnumbered = _get_unit_type_unnumbered($address); | |
$result = [ | |
'state' => $state, | |
'postcode' => $postcode, | |
'streetType' => $street_type, | |
'streetNumber' => $street_number, | |
'streetName' => $street_name, | |
'suburb' => $suburb | |
]; | |
return $result; | |
} | |
/** | |
* @param string $address | |
* | |
* @return int | |
*/ | |
function _get_fraction(string &$address) : int { | |
preg_match("/\d+\d+/", $address, $match); | |
return $match[0]; | |
} | |
/** | |
* @param string $address | |
* | |
* @return string | |
*/ | |
function _get_street_name_number(string &$address) : string { | |
preg_match("/(?<streetNumber>\d+-?\d*)(?=\D)/", $address, $match); | |
return $match[0]; | |
} | |
/** | |
* @param string $address | |
* | |
* @return string | |
*/ | |
function _get_street_number(string &$address): string | |
{ | |
preg_match("/(?<streetNumber>\d+-?\d*)(?=\D)/", $address, $match); | |
return $match[0]; | |
} | |
/** | |
* | |
* | |
* @param string $address | |
* @param string $type | |
* | |
* @return string | |
*/ | |
function _get_street_name(string &$address, string &$type, string &$street_number): string | |
{ | |
// @2DO: wtf is MatchedAddress.direct | |
preg_match("/(?:(?:(?<streetName_0>'++')\w+ (?<streetType_0>{$type}+)\b)|(?:(?<prefix_0>'+MatchedAddress.direct+')\w+)?(?:(?<streetName_1>[^,]*\\d)(?:[^\w,]*(?<suffix_1>'+MatchedAddress.direct+')\b)|(?<streetName_2>[^,]+)(?:[^\w,]+(?<streetType_2>{$type}+)\b)(?:[^\w,]+(?<suffix_2>'+MatchedAddress.direct+')\b)?|(?<street_3>[^,]+?)(?:[^\w,]+(?<streetType_3>{$type}+)\b)?(?:[^\w,]+(?<suffix_3>'+MatchedAddress.direct+')\b)?))/", $address, $match); | |
return trim(str_replace($street_number, '', $match[6])); | |
} | |
/** | |
* @param string $address | |
* @param string $state | |
* | |
* @return string | |
*/ | |
function _get_suburb_and_state(string &$address, string &$state): string | |
{ | |
preg_match("/(?:(?<suburb>[^\\d,]+?)\\W+(?<state>" . $state . "))/", $address, $match); | |
return $match[0]; | |
} | |
/** | |
* @param string $address | |
* | |
* @return string | |
*/ | |
function _get_street_type(string &$address): ?string | |
{ | |
$Street_Type = [ | |
'Alley' => 'AL', | |
'Arcade' => 'ARC', | |
'Avenue' => 'AVE', | |
'Boulevard' => 'BLV', | |
'Bend' => 'BND', | |
'Bypass' => 'BPS', | |
'Brace' => 'BR', | |
'Circuit' => 'CCT', | |
'Chase' => 'CH', | |
"Circle" => 'CIR', | |
"Close" => 'CL', | |
'Common' => 'CMN', | |
'Concourse' => 'CNC', | |
"Corner" => 'CNR', | |
"Circus" => 'CRC', | |
"Crescent" => 'CRS', | |
'Crossing' => 'CSG', | |
'Corso' => 'CSO', | |
"Court" => 'CT', | |
'Centre' => 'CTR', | |
"Cove" => 'CVE', | |
'Causeway' => 'CWY', | |
"Drive" => 'DR', | |
"Driveway" => 'DRY', | |
"Entrance" => 'ENT', | |
'Esplanade' => 'ESP', | |
"Expressway" => 'EXP', | |
"Fairway" => 'FAY', | |
"Frontage" => 'FR', | |
"Freeway" => 'FWY', | |
"Garden" => 'GDN', | |
"Glade" => 'GL', | |
"Glen" => 'GLN', | |
"Grange" => 'GRA', | |
"Ground" => 'GRD', | |
"Green" => 'GRN', | |
"Gate" => 'GTE', | |
"Grove" => 'GVE', | |
"Heights" => 'HTS', | |
"Highway" => 'HWY', | |
"Junction" => 'JN', | |
"Key" => 'KEY', | |
"Lane" => 'LA', | |
"Link" => 'LK', | |
"Loop" => 'LP', | |
"Mall" => 'ML', | |
"Mount" => 'MT', | |
"Mews" => 'MW', | |
"Motorway" => 'MWY', | |
"Nook" => 'NK', | |
"Outlook" => 'OUT', | |
"Parade" => 'PDE', | |
"Place" => 'PL', | |
"Plaza" => 'PLZ', | |
"Point" => 'PNT', | |
"Promenade" => 'PRM', | |
"Pass" => 'PSS', | |
"Path" => 'PT', | |
"Parkway" => 'PWY', | |
"Quadrant" => 'QD', | |
"Quadrangle" => 'QDG', | |
"Quay" => 'QY', | |
"Road" => 'RD', | |
"Ridge" => 'RDG', | |
"Roadway" => 'RDY', | |
"Reserve" => 'RES', | |
"Rise" => 'RI', | |
"Round" => 'RN', | |
"Row" => 'ROW', | |
"Rest" => 'RST', | |
"Retreat" => 'RT', | |
"Route" => 'RTE', | |
'Right of Way' => 'RTW', | |
"Siding" => 'SDG', | |
"Square" => 'SQ', | |
"Street" => 'ST', | |
"STS" => 'ST', | |
"Streets" => 'STS', | |
"Terrace" => 'TCE', | |
"Track" => 'TR', | |
"Trail" => 'TRL', | |
"Tollway" => 'TWY', | |
"View" => 'VW', | |
"Way" => 'WAY', | |
"Walk" => 'WK', | |
"Walkway" => 'WKY', | |
"Wynd" => 'WND' | |
]; | |
//array_map('strtolower', $Street_Type); | |
if (preg_match("/\b(?:" . join("|", array_values($Street_Type)) . ")\b/i", $address, $match) > 0) | |
{ | |
return ucfirst($match[0]); | |
} | |
else if (preg_match("/\b(?:" . join("|", array_keys($Street_Type)) . ")\b/i", $address, $match) > 0) | |
{ | |
return ucfirst($match[0]); | |
} | |
else | |
{ | |
return false; | |
} | |
} | |
/** | |
* @param string $address | |
* | |
* @return string|null | |
*/ | |
function _get_unit_type_numbered(string &$address) : ?string { | |
if (preg_match("/(?:<unitType_1>suite|p\W*[om]\W*b(?:ox)?|(?:ap|dep)(?:ar)?t(?:me?nt)?|ro*m|flo*r?|unit|building|hangar|lot|pier|slip|space?|stop|trailer|PO box|P\.O\. box)(?![a-z])/i", $address, $match) > 0) { | |
return $match[0]; | |
} else | |
return false; | |
} | |
/** | |
* @param string $address | |
* | |
* @return string|null | |
*/ | |
function _get_unit_type_unnumbered(string &$address): ?string { | |
if (preg_match("/(?<unitType_2>basement|front|lobby|lower|office|penthouse|rear|side|upper)\b/i", $address, $match)) { | |
return $match[0]; | |
} else { | |
return false; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment