Created
April 13, 2015 19:18
-
-
Save juriad/3287910cabc1d947c283 to your computer and use it in GitHub Desktop.
Loader of any hierarchical data from CSV to database; created for http://djpw.cz/162555
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
function buildIndex($header) { | |
$index = array(); | |
$i = 0; | |
foreach ($header as $field) { | |
$index[strtolower(trim($field))] = $i; | |
$i++; | |
} | |
return $index; | |
} | |
function prepareStatement($level, $db) { | |
$q = 'INSERT INTO `' . $level['name'] . '` ('; | |
$cols = array(); | |
$qmarks = array(); | |
foreach ($level['cols'] as $field) { | |
if (is_array($field)) { # ignore | |
continue; | |
} else if ($field[0] == '_') { # ignore | |
continue; | |
} else if ($field[0] == '^') { # parent id | |
$cols[] = '`' . substr($field, 1) . '`'; | |
$qmarks[] = '?'; | |
} else { # arbitrary | |
if($field[0] == '!') { # ignore ! | |
$field = substr($field, 1); | |
} | |
$ex = explode('=', $field); | |
$cols[] = '`' . $ex[0] . '`'; | |
$qmarks[] = '?'; | |
} | |
} | |
$q .= implode(', ', $cols) . ') VALUES (' . implode(',', $qmarks) . ')'; | |
echo $q; | |
echo "\n"; | |
return $db->prepare($q); | |
} | |
function getLevels($structure, $db) { | |
$levels = array(); | |
$level = $structure; | |
$level['stmt'] = prepareStatement($level, $db); | |
$levels[0] = $level; | |
while (true) { | |
$newlevel = NULL; | |
foreach($level['cols'] as $col) { | |
if (is_array($col)) { | |
$newlevel = $col; | |
break; | |
} | |
} | |
if ($newlevel == NULL) { | |
break; | |
} | |
$level = $newlevel; | |
$level['stmt'] = prepareStatement($level, $db); | |
$levels[] = $level; | |
} | |
return $levels; | |
} | |
function findFields($index, $row, $structure, $prefix, $l) { | |
$fields = array(); | |
foreach ($structure['cols'] as $field) { | |
if (is_array($field)) { # ignore | |
continue; | |
} else if ($field[0] == '_') { # ignore | |
continue; | |
} else if ($field[0] == '^') { # parent id | |
$fields[substr($field, 1)] = $prefix[$l - 1]['id']; | |
} else { # arbitrary | |
if($field[0] == '!') { # ignore ! | |
$field = substr($field, 1); | |
} | |
$ex = explode('=', strtolower($field)); | |
$field = $ex[0]; | |
$cfield = count($ex) == 2 ? $ex[1] : $ex[0]; | |
$fields[$field] = trim($row[$index[$cfield]]); | |
} | |
} | |
return $fields; | |
} | |
function arrayEquals($a1, $a2, $level) { | |
foreach ($level['cols'] as $field) { | |
if (is_array($field)) { # ignore | |
continue; | |
} else if ($field[0] == '_' || $field[0] == '!') { # ignore | |
continue; | |
} else { # with value | |
if ($field[0] == '^') { # parent id | |
$field = substr($field, 1); | |
} | |
$ex = explode('=', strtolower($field)); | |
$field = $ex[0]; | |
if ($a1[$field] != $a2[$field]) { | |
return FALSE; | |
} | |
} | |
} | |
return TRUE; | |
} | |
function insert($level, $fields, $db) { | |
$stmt = $level['stmt']; | |
# magic | |
$args = array($level['types']); | |
foreach ($fields as &$field) { | |
$args[] = & $field; | |
} | |
call_user_func_array(array($stmt, "bind_param"), $args); | |
#end magic | |
$stmt->execute(); | |
return $db->insert_id; | |
} | |
function importCSV($path, $delimiter, $structure, $db) { | |
$levels = getLevels($structure, $db); | |
$f = fopen($path, 'r'); | |
$header = fgetcsv($f, 0, $delimiter); | |
$index = buildIndex($header); | |
$prefix = array(); | |
$line = 0; | |
while ($row = fgetcsv($f, 0, $delimiter)) { | |
$l = 0; | |
foreach ($levels as $level) { | |
$fields = findFields($index, $row, $level, $prefix, $l); | |
if (!isset($prefix[$l]) || !arrayEquals($fields, $prefix[$l]['fields'], $level)) { | |
# insert this level | |
$prefix = array_slice($prefix, 0, $l); | |
$prefix[$l]['fields'] = $fields; | |
$prefix[$l]['id'] = insert($level, $fields, $db); | |
echo 'inserted ' . $level['name'] . "\n"; | |
} | |
$l++; | |
} | |
$line++; | |
echo $line . " rows processed\n"; | |
} | |
fclose($f); | |
} | |
$db = new mysqli('localhost', 'root', 'root', 'test'); | |
importCSV('songs.csv', '*', | |
array( | |
'name' => 'interprets', | |
'cols' => array('_id', 'name=contentgroup', array( | |
'name' => 'albums', | |
'cols' => array('_id', '^interpret', 'name=album', 'year', 'albumartist', 'publisher', '!web', '!password=heslo', array( | |
'name' => 'songs', | |
'cols' => array('_id', '^album', 'name=title', 'track', 'artist', 'composer', 'length', 'lyrics=text'), | |
'types' => 'isissii' | |
)), | |
'types' => 'isissss' | |
)), | |
'types' => 's' | |
), $db); | |
# columns starting with _ are ignored (reserved for ID) | |
# columns starting with ^ are reference to parent's ID | |
# columns starting with ! are explicitly removed from row comparison | |
# column may specify mapping between field in CSV and column in DB in form of equation |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment