Skip to content

Instantly share code, notes, and snippets.

@kyletaylored
Last active May 14, 2019 21:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kyletaylored/60ac3b6aacf298d4f23ced1331b351e2 to your computer and use it in GitHub Desktop.
Save kyletaylored/60ac3b6aacf298d4f23ced1331b351e2 to your computer and use it in GitHub Desktop.
just scratch pad for now
<?php
if (php_sapi_name() == 'cli' && empty($argv[1])) {
dd("Pass a file name as an argument. \n\n ./index.php filename.csv");
}
//*********** Sitemap Processor Class ***********
class SitemapProcessor {
private $paths;
private $json;
private $xml;
private $processed;
public function __construct($file) {
$this->loadXML($file);
$this->paths = $this->cleanUrls($this->xml);
$this->processed = $this->processUrls($this->paths);
$this->writeJson($this->processed);
}
/**
* Create SimpleXMLElement from file.
* @param string $file Path to file.
* @return SimpleXMLElement
*/
private function loadXML($file) {
$this->xml = simplexml_load_file($file) or die("Error: Cannot create object");
}
private function writeJson($obj) {
$tree = fopen("tree-xml.json", "w");
fwrite($tree, json_encode($obj));
fclose($tree);
}
public function serveJson($obj) {
header('Content-type: application/json');
print json_encode($obj);
}
/**
* Get max array depth
* @param array $arr Array to extract max values.
* @return int Length of longest array item.
*/
public function getMaxDepth(array $arr) {
$maxArr = array_map('strlen', $arr);
return max($maxArr);
}
/**
* Cleans list of URLs in sitemap file, returns list as array.
* @param SimpleXMLElement $xml Sitemap XML file.
* @return array List of cleaned URLs.
*/
public function cleanUrls(SimpleXMLElement $xml) {
$paths = [];
foreach ($xml as $sxe) {
// Trim any trailing slashes.
$path = rtrim(strtolower($sxe->loc), '/');
$url = parse_url($path);
$url['path'] = (!empty($url['path'])) ? $url['path'] : "";
$paths[] = $url['scheme'] . "://" . $url['host'] . $url['path'];
}
return $paths;
}
/**
* Splits URL into common components.
* @param string $path A URL string.
* @return array An array of URL parts.
*/
public function splitUrl(string $path) {
$url_parts = [];
$parts = parse_url($path);
$url_parts[] = $parts['host'];
$parts['path'] = (!empty($parts['path'])) ? $parts['path'] : "";
// Trim initial slash, break apart, and merge.
$url_parts = array_merge($url_parts, array_unique(explode('/', ltrim($parts['path'], "/"))));
return $url_parts;
}
public function createChild($part, &$parent) {
$obj = new stdClass();
$obj->name = $part;
if (empty($parent)) {
// I am now the parent.
$parent = $obj;
} else {
$parent->children = $obj;
}
return $parent;
}
/**
* Recursively merges two objects and returns a resulting object.
* @param object $obj1 The base object
* @param object $obj2 The merge object
* @return object The merged object
*/
public function mergeObjectsRecursively($obj1, $obj2) {
$baseObject = (array) $obj1;
$mergeObject = (array) $obj2;
$merged = array_merge_recursive($baseObject, $mergeObject);
return (object) $merged;
}
public function processUrls(array $paths) {
$url_map = new stdClass;
foreach ($paths as $path) {
$tmp = new stdClass;
$parts = $this->splitUrl($path);
foreach ($parts as $part) {
$this->createChild($part, $tmp);
}
$this->mergeObjectsRecursively($url_map, $tmp);
}
$this->processed = $url_map;
}
}
// Prep variables
$file = !empty($argv[1]) ? $argv[1] : $_GET['file'];
$sitemap = new SitemapProcessor($file);
@kyletaylored
Copy link
Author

Need to process sitemap into specific format to use in Tidy Tree.

Specific JSON format: https://raw.githubusercontent.com/d3/d3-hierarchy/v1.1.8/test/data/flare.json

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment