Skip to content

Instantly share code, notes, and snippets.

@nicholasdunbar
Created August 23, 2014 06:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nicholasdunbar/af73c436fbb0810fd95e to your computer and use it in GitHub Desktop.
Save nicholasdunbar/af73c436fbb0810fd95e to your computer and use it in GitHub Desktop.
Parse columns from pure text into structured data based on the precise spacing of the header and the columns.
<?php
//To parse an example like the following categorized columns:
/*
col1 col2 col3
==== ==== ====
1 a b c d e 103 14 as d9
2 a 103 14 as d9
3 a 103 14 as d9
*/
$headings = array('col1','col2','col3');
$header = "col1 col2 col3";
//get the $heading_pos_list by parsing the headings of each column with
list($heading_pos_list, $lengths) = parse_heading($headings, $header);
//Parse each line into a row structure
$start_position = $heading_pos_list[0][$heading_key];
$length_of_heading = $heading_pos_list[1][$heading_key];
$line = '1 a b c d e 103 14 as d9';
$row = parse_line($line, $headings, $start_position, $length_of_heading);
//this works for each column and line
echo $row('col1');
//output:
//1 a b c d e
//continue on for each line.
/************* Functions Below **************/
/**
* parse_heading
*
* Get the position and length of each column name in $heading
* if a column name is not in $line then remove it from $heading
*
* @param array $heading
* List of column names to get info about from $line
* Ex: array("Logical Drive Name", "LUN", "Controller", "Accessible by", "Logical Drive status");
*
* @param string $line
* The header for the columns
* Ex: "Logical Drive Name LUN Controller Accessible by Logical Drive status"
*
* @param boolean $is_logging
* Turn off log messages when a header is not found
*
* @return array $return_val
* Multidimentional array containing the start position of each column and length
* $return_val[0] = multi-element associative array
* Ex: $return_val[0]['Controller']
* $return_val[0]['Accessible by']
* $return_val[0] contains the starting position for each header
* $heading_key is the column name that has been like 'CONTROLLER' above
* Ex: $return_val[0][$heading_key] = starting position of heading
* $return_val[1] contains the length of each header
* Ex: $return_val[1][$heading_key] = length of heading including white space after it
*
*/
function parse_heading(&$heading, $line, $is_logging = TRUE) {
//search $line to find each start position of $heading
$start_pos=array();
$lengthOf=array();
$num_headings = count($heading);
for ($i = 0; $i < $num_headings; $i++){
$column_name = $heading[$i];
$matches = array();
$start = strpos($line, $column_name);
if (preg_match("/^.*?$column_name\s*/", $line, $matches)){
$end = strlen($matches[0]);
if($start !== FALSE) {
$start_pos[$column_name] = $start;
$lengthOf[$column_name] = $end-$start;
}
} else {
$heading[$i] = NULL;
if ($is_logging){
echo "Alert: Heading '$column_name' was not found.\n";
}
}
}
//filter the elements of the array using the function strlen,
//if strlen returns 0 then the element is removed
$heading = array_filter($heading, 'strlen');
//return start positions and lengths
return(array($start_pos, $lengthOf));
}
/**
* parse_line
*
* Pull the data out of the line into an array of fields based on the
* start position and length of each column.
*
* @param string $line
* The input to be processed
*
* @param array $heading
* The return value you get from the function parse_heading(...) used to determine
* where columns end and start for different columns on a line
* Example:
* $start_position = $heading[0][$heading_key];
* $length_of_heading = $heading[1][$heading_key];
*
* @param array $start_pos
* @param array $lengthOf
* @return array
*/
function parse_line($line, $heading, $start_pos, $lengthOf) {
foreach($heading as $field) {
if (! isset($start_pos[$field])) {
$arr[$field]="";
} elseif ($lengthOf[$field] == 0) {
$arr[$field]=trim(substr($line, $start_pos[$field]));
} else {
$arr[$field]=trim(substr($line, $start_pos[$field], $lengthOf[$field]));
}
}
return($arr);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment