Skip to content

Instantly share code, notes, and snippets.

@Hubro
Created March 27, 2012 14:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Hubro/2216437 to your computer and use it in GitHub Desktop.
Save Hubro/2216437 to your computer and use it in GitHub Desktop.
Function for converting an xml report from Yahoo Web Analytics into a json format
<?php
# Function for converting an xml report into a json format
function report_to_json($path)
{
# Check if the file exists
if(!file_exists($path)) return false;
# Set content type to json and charset to utf-8
# header('Content-type: application/json; charset=utf-8');
# Variable to hold the output data array
$outputData = array();
# Load the xml file as a DOMDocument
$dom = DOMDocument::load($path);
$domxpath = new DOMXpath($dom);
# Ready some xpath queries
$headersQuery = '/GetReportResponse/TopHeadRows/Row/Item/Name';
$leftItemsQuery = '/GetReportResponse/LeftHeadItems/Item';
$dataRowsQuery = '/GetReportResponse/DataRows/Row';
# Fetch the DOM lists for above queries
$headersDom = $domxpath->query($headersQuery);
$leftItemsDom = $domxpath->query($leftItemsQuery);
$dataRowsDom = $domxpath->query($dataRowsQuery);
# Prepare the headers
$headers = array("id", "Category", "children");
$extraHeaders = count($headers);
# Loop through the headers and fetch the value
for($i = 0; $i < $headersDom->length; $i++)
$headers[] = trim($headersDom->item($i)->nodeValue);
#####################
# Fetch item values #
#####################
# Ultimate row container
$rows = array();
# Always contains the previous occurrence of a row per indentation level.
# Used for calculating parent IDs.
$last = array();
# Loop once per data row
for($i = 0; $i < $dataRowsDom->length; $i++)
{
# Prepare an array for this row
unset($row);
$row = array();
# Fetch the datarow and get the cells
$dataRow = $dataRowsDom->item($i);
$dataCells = $domxpath->query('./Cell', $dataRow);
# Fetch the left head item
$leftItem = $leftItemsDom->item($i);
# The indent for this row
$indent = $domxpath->query('./Indent', $leftItem)
->item(0)->nodeValue;
$indent = intval(trim($indent));
# Find the parent of this row
if($indent > 0) $parent = &$last[$indent-1];
# Last
$last[$indent] = &$row;
# Loop through all headers and set the values on this row accordingly
foreach($headers as $h => $header)
{
# Header 0 is the id
if($h === 0)
{
$row[$header] = $i+1;
}
# Header 1 is the category, value taken from leftItem
else if($h == 1)
{
$cat = $domxpath->query('./Name', $leftItem)
->item(0)->nodeValue;
$cat = trim($cat);
$row[$header] = $cat;
}
# Header 2 is the children, just set it to an array
else if($h == 2)
{
$row[$header] = array();
}
# The rest of the headers have their values in the datarow
else if($h > 2)
{
$dataCell = $dataCells->item($h-$extraHeaders);
$cellValue = $domxpath->query('./FormattedValue', $dataCell)
->item(0)->nodeValue;
$cellValue = trim($cellValue);
$row[$header] = $cellValue;
}
}
# Add this row to the collected data rows or as the child of it's parent
if($indent < 1) $rows[] = &$row;
else $parent["children"][] = &$row;
}
// var_dump($rows);
return prettify(json_encode(($rows)));
}
# Function to prettify the output json
function prettify($json)
{
$result = '';
$pos = 0;
$strLen = strlen($json);
$indentStr = ' ';
$newLine = "\n";
$prevChar = '';
$outOfQuotes = true;
for ($i=0; $i<=$strLen; $i++) {
// Grab the next character in the string.
$char = substr($json, $i, 1);
// Are we inside a quoted string?
if ($char == '"' && $prevChar != '\\') {
$outOfQuotes = !$outOfQuotes;
// If this character is the end of an element,
// output a new line and indent the next line.
} else if(($char == '}' || $char == ']') && $outOfQuotes) {
$result .= $newLine;
$pos --;
for ($j=0; $j<$pos; $j++) {
$result .= $indentStr;
}
}
// Add the character to the result string.
$result .= $char;
// If the last character was the beginning of an element,
// output a new line and indent the next line.
if (($char == ',' || $char == '{' || $char == '[') && $outOfQuotes) {
$result .= $newLine;
if ($char == '{' || $char == '[') {
$pos ++;
}
for ($j = 0; $j < $pos; $j++) {
$result .= $indentStr;
}
}
$prevChar = $char;
}
return $result;
}
# Debug
if(!defined('ASTRUPS'))
{
header('Content-type: application/json');
echo report_to_json('data/example-report.xml');
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment