ace-dent/string_pack.php

## string_pack.php
<?php
	/**
	  * MAGICSTRINGPACKER
	  *
	  * Pack small strings or Codes (e.g. 2 letter ISO country codes), into a compact 'magic string' minimized by:
	  * 1> Running Codes into each other (eliminating matching first and last letters) 'AB'+'BA' => 'ABA' ;
	  * 2> Avoiding spaces / code break characters, by joining Codes 'AA,BB' => 'AABB';
	  * Only where 1&2 generates allowable sequences (unique, with no collisions).
	  * This is useful for matching data against a compact reference (magic string), where hashing isn't an option.
	  * E.g. Checking country codes in a spreadsheet against the list of EU countries.
	  *
	  * This software is provided 'as-is', without any express or implied warranty.
	  *
	  * @author Andrew C.E. Dent
	  *
	  * @license GPL
	  * @license http://opensource.org/licenses/gpl-license.php GNU Public License
	  * @version 1.2.1
	  */

	// The_Beginning:
	echo "Start... \n";
	$verbose = true; // Provide useful feedback for testing. Set to False for batch runs.


	// Small strings -or codes- that will be packed
	$codes_to_pack = array(
		// Country codes for EU VAT
		"AT","BE","BG","CY","CZ","DE","DK","EE","ES",
		"FI","FR","\GB","EL","HU","IE","IT","LT","LU", // VAT codes. Greece uses language code EL not country code GR
		"LV","MT","NL","PL","PT","RO","SE","SI","SK",
		//"EU", // General non-domicile EU registration
		"HR", // Croatia joined EU 1st July 2013
		"UK","IM","GR", // Extra country codes for checking Shipping destination against
		"MC" //  Monaco, French dependency
	);
	// To select the most commonly occuring code and fix its position at the start of the magic string
	// prefix with a slash ('GB' > '\GB'). This hack prevents string matches and may reduce compression.
	// Any slashes are ignored for statistics and stripped out at the end of the process.
	// This is useful in practice when matching data against the magic string, for early matching and escape.


	// Strings that are allowable in resulting compact form (unused Codes)
	// These codes may be generated when we remove the break character between two input codes
	// e.g. AA+BB > AABB, as 'AB' substring does not collide with another valid code.
	$codes_allowed = array(
		// Subset of codes -not- in use from ISO 3166-1:2006.
		// Selection based on EU countries. Some commented out for smaller array and speed gain.
		// "AB","AC","AH","AJ","AK","AP",//"AV","AY",
		"BC","BK","BL","BP","BQ","BU",//"BX",
		"CB","CE","CJ","CP","CQ","CT",//"CW",
		"EA","EB","ED","EF","EI","EJ","EK",/*"EL" - Greece*/ "EM","EN","EO","EP","EQ",//"EV","EW","EX","EY","EZ",
		"GC","GJ","GK","GO",//"GV","GX","GZ",
		"IA","IB","IC","IF","IG","IH","II","IJ","IK","IP","IU",//"IV","IW","IX","IY","IZ",
		"KA","KB","KC","KD","KF","KJ","KK","KL","KO","KQ","KS","KT","KU",//"KV","KX",
		"LD","LE","LF","LG","LH","LJ","LL","LM","LN","LO","LP","LQ",//"LW","LX","LZ",
		"MB","MF","MI","MJ",
		"OA","OB","OC","OD","OE","OF","OG","OH","OI","OJ","OK","OL","ON","OP","OQ","OR","OS","OT","OU",//"OV","OW","OX","OY","OZ",
		//"QB", "QC", "QD", "QE", "QF", "QG", "QH", "QI", "QJ", "QK", "QL"
		"RA","RB","RC","RD","RF","RG","RH","RI","RJ","RK","RL","RM","RN","RP","RQ","RR","RT",//"RV","RX","RY","RZ",
		"SF","SP","SQ","SS","SU",//"SW","SX",
		"TA","TB","TE","TI","TQ","TS","TU",//"TX","TY",
		"UB","UC","UD","UE","UF","UH","UI","UJ","UK","UL","UN","UO","UP","UQ","UR","UT","UU",//"UV","UW","UX",
		"VB","VD","VF","VH","VJ","VK","VL","VM","VO","VP","VQ","VR","VS","VT",//"VV","VW","VX","VY","VZ",
		"YA","YB","YC","YD","YF","YG","YH","YI","YJ","YK","YL","YM","YN","YO","YP","YQ","YR","YS",//"YV","YW","YX","YY","YZ",
		"ZB","ZC","ZD","ZE","ZF","ZG","ZH","ZI","ZJ","ZK","ZL","ZN","ZO","ZP","ZQ","ZR","ZS","ZT","ZU",//"ZV","ZX","ZY"

		// Include the following historically 'reserved' and private codes, as we know they wont appear in our data set.
		// Some commented out for smaller array and speed gain.
		//"AA", "OO"
		"CS","TP","YU","EU"
		//"QM", "QN", "QO", "QP", "QQ", "QR", "QS", "QT", "QU", // "QV", "QW", "QX", "QY", "QZ",
		//"XA", "XB", "XC", "XD", "XE", "XF", "XG", "XH", "XI", "XJ", "XK", "XL", "XM", "XN",
		//"XO", "XP", "XQ", "XR", "XS", "XT", "XU", // "XV", "XW", "XX", "XY", "XZ",
		//"ZZ"
	);

	// Add in the Codes to Pack as Allowable duplicates in the final magic string.
	// (Remove following line if each code can only occur once).
	$codes_allowed = array_merge($codes_allowed, $codes_to_pack);


	// Benchmarking - Starting details
	$start_codes = implode(".", $codes_to_pack); // Starting string with delimited Codes
	$start_length = strlen(stripslashes($start_codes)); // Ignore slashes for Stats- stripped out at the end
	$total_codes = count($codes_to_pack);
	if ($verbose) {
		echo "\n";
		echo $total_codes . " codes using " . $start_length . " chars (~" . round(8*$start_length/$total_codes,1) . " bits/code) -> \n";
		echo $start_codes . "\n";
	}

	//	@TODO: We should scan and clean up arrays to remove duplicates.

	// Remove Allowed codes that cannot be matched against Codes to Pack - faster later
	if ($verbose) echo "\nReduce list of Allowed Codes that cannot be matched: ";
	foreach ($codes_allowed as &$code) {
		$last = substr($code,-1,1) ; // Last letter of string
		$check = false;
		foreach ($codes_to_pack as $code_check) {
			$first_check = substr($code_check,0,1) ; // First letter of string
			if ($last == $first_check) $check = true; // The code was matched
		}
		unset($code_check); // Break the reference with the last element
		if ($check == false) {
			if ($verbose) echo $code. " - ";
			$code = null;
		}
	}
	unset($code); // Break the reference with the last element


	// Remove Codes that cannot be merged in Round 1- faster matching later
	/* Currently this approach creates local areas of low compression in the final string.
	It could be possible to transform these strings, substituting characters.
	Then requires code overhead to decode.
	*/

	if ($verbose) echo "\n\nIgnore codes with no matching letters for first round: ";
	$removed_codes = array (); // We will store removed codes to add back to the final output
	foreach ($codes_to_pack as &$code) {
		$first = substr($code,0,1) ; // First letter of string
		$last = substr($code,-1,1) ; // Last letter of string
		$check = false;
		foreach ($codes_to_pack as $code_check) {
			$first_check = substr($code_check,0,1) ; // First letter of string
			$last_check = substr($code_check,-1,1) ; // Last letter of string
			if ($first == $last_check || $last == $first_check) $check = true; // The code was matched
		}
		unset($code_check); // Break the reference with the last element
		if ($check == false) {
			if ($verbose) echo $code. " (". $first . $last . ") - ";
			$removed_codes[] = $code;
			$code = null;
		}
	}
	unset($code); // Break the reference with the last element
	if ($verbose) echo "\n";

	// Round 1 - Merge strings by matching (and combining) first and last letters
	/* Current algorithm is quite lazy. A 'greedy' approach may yield higher local compression:
	e.g. 	IESEE > IE ES SE EE > 10bits/code
			SIEESELV > SI IE EE ES SE EL LV > 9.14bits/code
			BGBEESIMCZ > BG GB BE EE ES SI IM MC CZ > 8.89bits/code

	 In testing, where the numbers of codes to compact are small, a greedy approach yields more
	 orphan codes that are uncompacted, balancing out any gains of the few long runs. Every merge saves 2 bytes,
	 whereas in Round 2, each join only saves 1 byte. It seems to be a better strategy to be lazy...
	*/

	$input = $codes_to_pack;
	for ($i = 1; $i <= 5; $i++) {
		shuffle($input); // Randomize order of strings in each sweep
		$result = array();

		foreach ($input as &$code) {
			$first = substr($code,0,1) ; // First letter of string
			$last = substr($code,-1,1) ; // Last letter of string
			foreach ($input as &$code_check) {
				$first_check = substr($code_check,0,1) ; // First letter of string
				$last_check = substr($code_check,-1,1) ; // Last letter of string
				if ($last == $first_check && $code != $code_check ) {
					// Check this match first, to prefer 'GB+BG>GBG' vs 'GB+BG>BGB'
					$result[] = $code . substr($code_check,1);
					$code = null;
					$code_check = null;
					break;
				} else if ($first == $last_check && $code != $code_check ) {
					$result[] = $code_check . substr($code,1);
					$code = null;
					$code_check = null;
					break;
				}
			}
			unset($code_check); // Break the reference with the last element
		}
		unset($code); // Break the reference with the last element

		// Tidy up - Find any codes that weren't matched and add to the $results array for another pass
		foreach ($input as &$code) {
			if ($code != null) {
				$result[] = $code;
				$code = null;
			}
		}

		if ($verbose) {
			echo "\n Run-".$i." : ";
			echo implode(".", $result);
		}

		unset($input);
		$input = $result;
		unset($result);
	}

	// Add back in the unmatching codes removed at the start
	$codes_to_pack = array_merge($codes_to_pack, $removed_codes); // Fix the original array
	$result = array_merge($input, $removed_codes); // Working array
	unset($input);
	unset($removed_codes);


	if ($verbose) echo "\n *** \n";
	// Last round - remove space between codes, if consecutive letters are in Allowed list

	$input = $result;

	for ($i = 1; $i <= 20; $i++) {
		shuffle($input);
		$result = array();

		foreach ($input as &$code) {
			$last = substr($code,-1,1) ; // Last letter of string
			foreach ($input as &$code_check) {
				$first_check = substr($code_check,0,1) ; // First letter of string
				foreach ($codes_allowed as $code_available) {
					$combo = $last . $first_check ;
					if ($combo == $code_available && $code != $code_check) {

						// @TODO :
						// Test if joining of the two strings duplicates a code
						// that may be replaced elsewhere (although unlikely with few codes)
						// Find the original, existing string e.g. ES
							// If string length = 2, delete ES -> -
							// Check start of strings ESAA -> -SAA
							// Check end of strings AAES -> AAE-
							// Else we cannot remove AESA -> AESA
						// Only bother to test for one letter, as two is very unlikely for code length=2...

						// Finally combine the two strings

						// @TODO :
						// Redundant check to see if first / last letter is equal then compress instead of concatenate
						// IE + EE > IEE ... or AA + AA = AA
						// This shouldn't happen after the 1st pass!... Check we don't overly compress e.g. 'EE'

						$result[] = $code . $code_check; // Concatenate strings
						$code = null;
						$code_check = null;
						break;
					}
				}
				unset($code_available); // Break the reference with the last element
			}
			unset($code_check); // Break the reference with the last element
		}
		unset($code); // Break the reference with the last element

		// Tidy up - Find any codes that weren't matched and add to the $results array for another pass
		foreach ($input as &$code) {
			if ($code != null) {
				$result[] = $code;
				$code = null;
			}
		}

		if ($verbose) {
			echo "\n Run-".$i." : ";
			echo implode(".", $result);
		}

		unset($input);
		$input = $result;
		unset($result);
	}


	// Finish up
	$result = $input;
	unset($input);
	if ($verbose) echo "\n";


	$magic_string = stripslashes( implode(".", $result) ); // Remove any slashes
	unset($result);
	$magic_length = strlen($magic_string);


	// Test for duplicates
	/*
	Currently we don't do anything about this. Best is to tackle at the point repeated strings are generated.
	Also edge case of repeated runs of letters,
		e.g. AA+AB BA+AA > AAB BAA, AAB+BAA > AABAA
	*/

	if ($verbose)  {
		$result = array (); // Array to store codes for duplication check
		$result_inter_codes = array (); // Array to store 'inter-codes' (adjacent non-coding letters)
		// TODO - This currently only works for fixed 2 character codes
		// Should be rewritten for variable input code lengths...
		for ($i = 1; $i < $magic_length; $i++) {
			$code = substr($magic_string,$i-1,2) ; // Grab possible 2 character code
			$check = false;
			foreach ($codes_to_pack as $code_check) {
				if (stripslashes($code_check) == $code) {
					$check = true; // The code was matched
					$result[] = $code; // Codes get logged
				}
			}
			unset($code_check); // Break the reference with the last element
			if ($check == false) $result_inter_codes[] = $code; // Log inter-code
		}
		asort($result);
		asort($result_inter_codes);
		// If we have more Codes than we started with, duplicates are present...
		if (count($result) != $total_codes) echo "\nWARNING: Possible duplicate codes... ";
		echo "\nCodes, from adjacent letters: \n ";
		echo implode("-", $result);
		echo " ". count($result)." codes \n\n";
		echo "Non-matching codes, from adjacent letters: \n ";
		echo implode("-", $result_inter_codes);
		echo " ". count($result_inter_codes)." codes \n\n";
	}


	if ($verbose) {
		echo $magic_string . " <- " . $magic_length . " chars (~" . round(8*$magic_length/$total_codes,1) . " bits/code).\n";
		echo "Compressed: " . round(($start_length - $magic_length) / $start_length * 100) . "% of original. \n";
		echo round((800*$magic_length)/($total_codes*10),0) . "% Theoretical limit (10 bits/code). \n";
		// Theoretical limit applies to country codes
	}

	// Set here conditions to Save the packed magic string generated
	// For our data, we want the most common code first 'GB' for early matching and escape.
	if ($magic_length < 49 || $magic_length < 50 && substr($magic_string,0,2) == "GB" ) {

		$filename = 'magic_string.txt';
		$somecontent = "\n" . $magic_length . " - ". $magic_string;

		// Let's make sure the file exists and is writable first.
		if (is_writable($filename)) {

		    // We're opening $filename in append mode.
		    // The file pointer is at the bottom of the file hence
		    // that's where $somecontent will go when we fwrite() it.
		    if (!$handle = fopen($filename, 'a')) {
		         echo "Cannot open file ($filename)";
		         exit;
		    }

		    // Write $somecontent to our opened file.
		    if (fwrite($handle, $somecontent) === FALSE) {
		        echo "Cannot write to file ($filename)";
		        exit;
		    }

		    echo "Success, wrote ( $somecontent ) to file ($filename)";

		    fclose($handle);

		} else {
		    echo "The file $filename is not writable";
		}

	}

	echo " ...End \n";

	// Cleanup and unset all variables
	$keys = array();
	foreach($GLOBALS as $k => $v){
		$keys[] = $k;
	}
	for($t=1;$keys[$t];$t++){
		unset($$keys[$t]);
	}
	unset($k); unset($v); unset($t);

?>
	<?php
	/**
	* MAGICSTRINGPACKER
	*
	* Pack small strings or Codes (e.g. 2 letter ISO country codes), into a compact 'magic string' minimized by:
	* 1> Running Codes into each other (eliminating matching first and last letters) 'AB'+'BA' => 'ABA' ;
	* 2> Avoiding spaces / code break characters, by joining Codes 'AA,BB' => 'AABB';
	* Only where 1&2 generates allowable sequences (unique, with no collisions).
	* This is useful for matching data against a compact reference (magic string), where hashing isn't an option.
	* E.g. Checking country codes in a spreadsheet against the list of EU countries.
	*
	* This software is provided 'as-is', without any express or implied warranty.
	*
	* @author Andrew C.E. Dent
	*
	* @license GPL
	* @license http://opensource.org/licenses/gpl-license.php GNU Public License
	* @version 1.2.1
	*/

	// The_Beginning:
	echo "Start... \n";
	$verbose = true; // Provide useful feedback for testing. Set to False for batch runs.


	// Small strings -or codes- that will be packed
	$codes_to_pack = array(
	// Country codes for EU VAT
	"AT","BE","BG","CY","CZ","DE","DK","EE","ES",
	"FI","FR","\GB","EL","HU","IE","IT","LT","LU", // VAT codes. Greece uses language code EL not country code GR
	"LV","MT","NL","PL","PT","RO","SE","SI","SK",
	//"EU", // General non-domicile EU registration
	"HR", // Croatia joined EU 1st July 2013
	"UK","IM","GR", // Extra country codes for checking Shipping destination against
	"MC" // Monaco, French dependency
	);
	// To select the most commonly occuring code and fix its position at the start of the magic string
	// prefix with a slash ('GB' > '\GB'). This hack prevents string matches and may reduce compression.
	// Any slashes are ignored for statistics and stripped out at the end of the process.
	// This is useful in practice when matching data against the magic string, for early matching and escape.


	// Strings that are allowable in resulting compact form (unused Codes)
	// These codes may be generated when we remove the break character between two input codes
	// e.g. AA+BB > AABB, as 'AB' substring does not collide with another valid code.
	$codes_allowed = array(
	// Subset of codes -not- in use from ISO 3166-1:2006.
	// Selection based on EU countries. Some commented out for smaller array and speed gain.
	// "AB","AC","AH","AJ","AK","AP",//"AV","AY",
	"BC","BK","BL","BP","BQ","BU",//"BX",
	"CB","CE","CJ","CP","CQ","CT",//"CW",
	"EA","EB","ED","EF","EI","EJ","EK",/"EL" - Greece/ "EM","EN","EO","EP","EQ",//"EV","EW","EX","EY","EZ",
	"GC","GJ","GK","GO",//"GV","GX","GZ",
	"IA","IB","IC","IF","IG","IH","II","IJ","IK","IP","IU",//"IV","IW","IX","IY","IZ",
	"KA","KB","KC","KD","KF","KJ","KK","KL","KO","KQ","KS","KT","KU",//"KV","KX",
	"LD","LE","LF","LG","LH","LJ","LL","LM","LN","LO","LP","LQ",//"LW","LX","LZ",
	"MB","MF","MI","MJ",
	"OA","OB","OC","OD","OE","OF","OG","OH","OI","OJ","OK","OL","ON","OP","OQ","OR","OS","OT","OU",//"OV","OW","OX","OY","OZ",
	//"QB", "QC", "QD", "QE", "QF", "QG", "QH", "QI", "QJ", "QK", "QL"
	"RA","RB","RC","RD","RF","RG","RH","RI","RJ","RK","RL","RM","RN","RP","RQ","RR","RT",//"RV","RX","RY","RZ",
	"SF","SP","SQ","SS","SU",//"SW","SX",
	"TA","TB","TE","TI","TQ","TS","TU",//"TX","TY",
	"UB","UC","UD","UE","UF","UH","UI","UJ","UK","UL","UN","UO","UP","UQ","UR","UT","UU",//"UV","UW","UX",
	"VB","VD","VF","VH","VJ","VK","VL","VM","VO","VP","VQ","VR","VS","VT",//"VV","VW","VX","VY","VZ",
	"YA","YB","YC","YD","YF","YG","YH","YI","YJ","YK","YL","YM","YN","YO","YP","YQ","YR","YS",//"YV","YW","YX","YY","YZ",
	"ZB","ZC","ZD","ZE","ZF","ZG","ZH","ZI","ZJ","ZK","ZL","ZN","ZO","ZP","ZQ","ZR","ZS","ZT","ZU",//"ZV","ZX","ZY"

	// Include the following historically 'reserved' and private codes, as we know they wont appear in our data set.
	// Some commented out for smaller array and speed gain.
	//"AA", "OO"
	"CS","TP","YU","EU"
	//"QM", "QN", "QO", "QP", "QQ", "QR", "QS", "QT", "QU", // "QV", "QW", "QX", "QY", "QZ",
	//"XA", "XB", "XC", "XD", "XE", "XF", "XG", "XH", "XI", "XJ", "XK", "XL", "XM", "XN",
	//"XO", "XP", "XQ", "XR", "XS", "XT", "XU", // "XV", "XW", "XX", "XY", "XZ",
	//"ZZ"
	);

	// Add in the Codes to Pack as Allowable duplicates in the final magic string.
	// (Remove following line if each code can only occur once).
	$codes_allowed = array_merge($codes_allowed, $codes_to_pack);


	// Benchmarking - Starting details
	$start_codes = implode(".", $codes_to_pack); // Starting string with delimited Codes
	$start_length = strlen(stripslashes($start_codes)); // Ignore slashes for Stats- stripped out at the end
	$total_codes = count($codes_to_pack);
	if ($verbose) {
	echo "\n";
	echo $total_codes . " codes using " . $start_length . " chars (~" . round(8*$start_length/$total_codes,1) . " bits/code) -> \n";
	echo $start_codes . "\n";
	}

	// @TODO: We should scan and clean up arrays to remove duplicates.

	// Remove Allowed codes that cannot be matched against Codes to Pack - faster later
	if ($verbose) echo "\nReduce list of Allowed Codes that cannot be matched: ";
	foreach ($codes_allowed as &$code) {
	$last = substr($code,-1,1) ; // Last letter of string
	$check = false;
	foreach ($codes_to_pack as $code_check) {
	$first_check = substr($code_check,0,1) ; // First letter of string
	if ($last == $first_check) $check = true; // The code was matched
	}
	unset($code_check); // Break the reference with the last element
	if ($check == false) {
	if ($verbose) echo $code. " - ";
	$code = null;
	}
	}
	unset($code); // Break the reference with the last element


	// Remove Codes that cannot be merged in Round 1- faster matching later
	/* Currently this approach creates local areas of low compression in the final string.
	It could be possible to transform these strings, substituting characters.
	Then requires code overhead to decode.
	*/

	if ($verbose) echo "\n\nIgnore codes with no matching letters for first round: ";
	$removed_codes = array (); // We will store removed codes to add back to the final output
	foreach ($codes_to_pack as &$code) {
	$first = substr($code,0,1) ; // First letter of string
	$last = substr($code,-1,1) ; // Last letter of string
	$check = false;
	foreach ($codes_to_pack as $code_check) {
	$first_check = substr($code_check,0,1) ; // First letter of string
	$last_check = substr($code_check,-1,1) ; // Last letter of string
	if ($first == $last_check \|\| $last == $first_check) $check = true; // The code was matched
	}
	unset($code_check); // Break the reference with the last element
	if ($check == false) {
	if ($verbose) echo $code. " (". $first . $last . ") - ";
	$removed_codes[] = $code;
	$code = null;
	}
	}
	unset($code); // Break the reference with the last element
	if ($verbose) echo "\n";

	// Round 1 - Merge strings by matching (and combining) first and last letters
	/* Current algorithm is quite lazy. A 'greedy' approach may yield higher local compression:
	e.g. IESEE > IE ES SE EE > 10bits/code
	SIEESELV > SI IE EE ES SE EL LV > 9.14bits/code
	BGBEESIMCZ > BG GB BE EE ES SI IM MC CZ > 8.89bits/code

	In testing, where the numbers of codes to compact are small, a greedy approach yields more
	orphan codes that are uncompacted, balancing out any gains of the few long runs. Every merge saves 2 bytes,
	whereas in Round 2, each join only saves 1 byte. It seems to be a better strategy to be lazy...
	*/

	$input = $codes_to_pack;
	for ($i = 1; $i <= 5; $i++) {
	shuffle($input); // Randomize order of strings in each sweep
	$result = array();

	foreach ($input as &$code) {
	$first = substr($code,0,1) ; // First letter of string
	$last = substr($code,-1,1) ; // Last letter of string
	foreach ($input as &$code_check) {
	$first_check = substr($code_check,0,1) ; // First letter of string
	$last_check = substr($code_check,-1,1) ; // Last letter of string
	if ($last == $first_check && $code != $code_check ) {
	// Check this match first, to prefer 'GB+BG>GBG' vs 'GB+BG>BGB'
	$result[] = $code . substr($code_check,1);
	$code = null;
	$code_check = null;
	break;
	} else if ($first == $last_check && $code != $code_check ) {
	$result[] = $code_check . substr($code,1);
	$code = null;
	$code_check = null;
	break;
	}
	}
	unset($code_check); // Break the reference with the last element
	}
	unset($code); // Break the reference with the last element

	// Tidy up - Find any codes that weren't matched and add to the $results array for another pass
	foreach ($input as &$code) {
	if ($code != null) {
	$result[] = $code;
	$code = null;
	}
	}

	if ($verbose) {
	echo "\n Run-".$i." : ";
	echo implode(".", $result);
	}

	unset($input);
	$input = $result;
	unset($result);
	}

	// Add back in the unmatching codes removed at the start
	$codes_to_pack = array_merge($codes_to_pack, $removed_codes); // Fix the original array
	$result = array_merge($input, $removed_codes); // Working array
	unset($input);
	unset($removed_codes);


	if ($verbose) echo "\n *** \n";
	// Last round - remove space between codes, if consecutive letters are in Allowed list

	$input = $result;

	for ($i = 1; $i <= 20; $i++) {
	shuffle($input);
	$result = array();

	foreach ($input as &$code) {
	$last = substr($code,-1,1) ; // Last letter of string
	foreach ($input as &$code_check) {
	$first_check = substr($code_check,0,1) ; // First letter of string
	foreach ($codes_allowed as $code_available) {
	$combo = $last . $first_check ;
	if ($combo == $code_available && $code != $code_check) {

	// @TODO :
	// Test if joining of the two strings duplicates a code
	// that may be replaced elsewhere (although unlikely with few codes)
	// Find the original, existing string e.g. ES
	// If string length = 2, delete ES -> -
	// Check start of strings ESAA -> -SAA
	// Check end of strings AAES -> AAE-
	// Else we cannot remove AESA -> AESA
	// Only bother to test for one letter, as two is very unlikely for code length=2...

	// Finally combine the two strings

	// @TODO :
	// Redundant check to see if first / last letter is equal then compress instead of concatenate
	// IE + EE > IEE ... or AA + AA = AA
	// This shouldn't happen after the 1st pass!... Check we don't overly compress e.g. 'EE'

	$result[] = $code . $code_check; // Concatenate strings
	$code = null;
	$code_check = null;
	break;
	}
	}
	unset($code_available); // Break the reference with the last element
	}
	unset($code_check); // Break the reference with the last element
	}
	unset($code); // Break the reference with the last element

	// Tidy up - Find any codes that weren't matched and add to the $results array for another pass
	foreach ($input as &$code) {
	if ($code != null) {
	$result[] = $code;
	$code = null;
	}
	}

	if ($verbose) {
	echo "\n Run-".$i." : ";
	echo implode(".", $result);
	}

	unset($input);
	$input = $result;
	unset($result);
	}



	// Finish up
	$result = $input;
	unset($input);
	if ($verbose) echo "\n";


	$magic_string = stripslashes( implode(".", $result) ); // Remove any slashes
	unset($result);
	$magic_length = strlen($magic_string);


	// Test for duplicates
	/*
	Currently we don't do anything about this. Best is to tackle at the point repeated strings are generated.
	Also edge case of repeated runs of letters,
	e.g. AA+AB BA+AA > AAB BAA, AAB+BAA > AABAA
	*/

	if ($verbose) {
	$result = array (); // Array to store codes for duplication check
	$result_inter_codes = array (); // Array to store 'inter-codes' (adjacent non-coding letters)
	// TODO - This currently only works for fixed 2 character codes
	// Should be rewritten for variable input code lengths...
	for ($i = 1; $i < $magic_length; $i++) {
	$code = substr($magic_string,$i-1,2) ; // Grab possible 2 character code
	$check = false;
	foreach ($codes_to_pack as $code_check) {
	if (stripslashes($code_check) == $code) {
	$check = true; // The code was matched
	$result[] = $code; // Codes get logged
	}
	}
	unset($code_check); // Break the reference with the last element
	if ($check == false) $result_inter_codes[] = $code; // Log inter-code
	}
	asort($result);
	asort($result_inter_codes);
	// If we have more Codes than we started with, duplicates are present...
	if (count($result) != $total_codes) echo "\nWARNING: Possible duplicate codes... ";
	echo "\nCodes, from adjacent letters: \n ";
	echo implode("-", $result);
	echo " ". count($result)." codes \n\n";
	echo "Non-matching codes, from adjacent letters: \n ";
	echo implode("-", $result_inter_codes);
	echo " ". count($result_inter_codes)." codes \n\n";
	}


	if ($verbose) {
	echo $magic_string . " <- " . $magic_length . " chars (~" . round(8*$magic_length/$total_codes,1) . " bits/code).\n";
	echo "Compressed: " . round(($start_length - $magic_length) / $start_length * 100) . "% of original. \n";
	echo round((800$magic_length)/($total_codes10),0) . "% Theoretical limit (10 bits/code). \n";
	// Theoretical limit applies to country codes
	}

	// Set here conditions to Save the packed magic string generated
	// For our data, we want the most common code first 'GB' for early matching and escape.
	if ($magic_length < 49 \|\| $magic_length < 50 && substr($magic_string,0,2) == "GB" ) {

	$filename = 'magic_string.txt';
	$somecontent = "\n" . $magic_length . " - ". $magic_string;

	// Let's make sure the file exists and is writable first.
	if (is_writable($filename)) {

	// We're opening $filename in append mode.
	// The file pointer is at the bottom of the file hence
	// that's where $somecontent will go when we fwrite() it.
	if (!$handle = fopen($filename, 'a')) {
	echo "Cannot open file ($filename)";
	exit;
	}

	// Write $somecontent to our opened file.
	if (fwrite($handle, $somecontent) === FALSE) {
	echo "Cannot write to file ($filename)";
	exit;
	}

	echo "Success, wrote ( $somecontent ) to file ($filename)";

	fclose($handle);

	} else {
	echo "The file $filename is not writable";
	}

	}

	echo " ...End \n";

	// Cleanup and unset all variables
	$keys = array();
	foreach($GLOBALS as $k => $v){
	$keys[] = $k;
	}
	for($t=1;$keys[$t];$t++){
	unset($$keys[$t]);
	}
	unset($k); unset($v); unset($t);

	?>