Last active
May 14, 2019 21:43
-
-
Save kyletaylored/60ac3b6aacf298d4f23ced1331b351e2 to your computer and use it in GitHub Desktop.
just scratch pad for now
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
if (php_sapi_name() == 'cli' && empty($argv[1])) { | |
dd("Pass a file name as an argument. \n\n ./index.php filename.csv"); | |
} | |
//*********** Sitemap Processor Class *********** | |
class SitemapProcessor { | |
private $paths; | |
private $json; | |
private $xml; | |
private $processed; | |
public function __construct($file) { | |
$this->loadXML($file); | |
$this->paths = $this->cleanUrls($this->xml); | |
$this->processed = $this->processUrls($this->paths); | |
$this->writeJson($this->processed); | |
} | |
/** | |
* Create SimpleXMLElement from file. | |
* @param string $file Path to file. | |
* @return SimpleXMLElement | |
*/ | |
private function loadXML($file) { | |
$this->xml = simplexml_load_file($file) or die("Error: Cannot create object"); | |
} | |
private function writeJson($obj) { | |
$tree = fopen("tree-xml.json", "w"); | |
fwrite($tree, json_encode($obj)); | |
fclose($tree); | |
} | |
public function serveJson($obj) { | |
header('Content-type: application/json'); | |
print json_encode($obj); | |
} | |
/** | |
* Get max array depth | |
* @param array $arr Array to extract max values. | |
* @return int Length of longest array item. | |
*/ | |
public function getMaxDepth(array $arr) { | |
$maxArr = array_map('strlen', $arr); | |
return max($maxArr); | |
} | |
/** | |
* Cleans list of URLs in sitemap file, returns list as array. | |
* @param SimpleXMLElement $xml Sitemap XML file. | |
* @return array List of cleaned URLs. | |
*/ | |
public function cleanUrls(SimpleXMLElement $xml) { | |
$paths = []; | |
foreach ($xml as $sxe) { | |
// Trim any trailing slashes. | |
$path = rtrim(strtolower($sxe->loc), '/'); | |
$url = parse_url($path); | |
$url['path'] = (!empty($url['path'])) ? $url['path'] : ""; | |
$paths[] = $url['scheme'] . "://" . $url['host'] . $url['path']; | |
} | |
return $paths; | |
} | |
/** | |
* Splits URL into common components. | |
* @param string $path A URL string. | |
* @return array An array of URL parts. | |
*/ | |
public function splitUrl(string $path) { | |
$url_parts = []; | |
$parts = parse_url($path); | |
$url_parts[] = $parts['host']; | |
$parts['path'] = (!empty($parts['path'])) ? $parts['path'] : ""; | |
// Trim initial slash, break apart, and merge. | |
$url_parts = array_merge($url_parts, array_unique(explode('/', ltrim($parts['path'], "/")))); | |
return $url_parts; | |
} | |
public function createChild($part, &$parent) { | |
$obj = new stdClass(); | |
$obj->name = $part; | |
if (empty($parent)) { | |
// I am now the parent. | |
$parent = $obj; | |
} else { | |
$parent->children = $obj; | |
} | |
return $parent; | |
} | |
/** | |
* Recursively merges two objects and returns a resulting object. | |
* @param object $obj1 The base object | |
* @param object $obj2 The merge object | |
* @return object The merged object | |
*/ | |
public function mergeObjectsRecursively($obj1, $obj2) { | |
$baseObject = (array) $obj1; | |
$mergeObject = (array) $obj2; | |
$merged = array_merge_recursive($baseObject, $mergeObject); | |
return (object) $merged; | |
} | |
public function processUrls(array $paths) { | |
$url_map = new stdClass; | |
foreach ($paths as $path) { | |
$tmp = new stdClass; | |
$parts = $this->splitUrl($path); | |
foreach ($parts as $part) { | |
$this->createChild($part, $tmp); | |
} | |
$this->mergeObjectsRecursively($url_map, $tmp); | |
} | |
$this->processed = $url_map; | |
} | |
} | |
// Prep variables | |
$file = !empty($argv[1]) ? $argv[1] : $_GET['file']; | |
$sitemap = new SitemapProcessor($file); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Need to process sitemap into specific format to use in Tidy Tree.
Specific JSON format: https://raw.githubusercontent.com/d3/d3-hierarchy/v1.1.8/test/data/flare.json