Skip to content

Instantly share code, notes, and snippets.

@juriad
Created April 13, 2015 19:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save juriad/3287910cabc1d947c283 to your computer and use it in GitHub Desktop.
Save juriad/3287910cabc1d947c283 to your computer and use it in GitHub Desktop.
Loader of any hierarchical data from CSV to database; created for http://djpw.cz/162555
<?php
function buildIndex($header) {
$index = array();
$i = 0;
foreach ($header as $field) {
$index[strtolower(trim($field))] = $i;
$i++;
}
return $index;
}
function prepareStatement($level, $db) {
$q = 'INSERT INTO `' . $level['name'] . '` (';
$cols = array();
$qmarks = array();
foreach ($level['cols'] as $field) {
if (is_array($field)) { # ignore
continue;
} else if ($field[0] == '_') { # ignore
continue;
} else if ($field[0] == '^') { # parent id
$cols[] = '`' . substr($field, 1) . '`';
$qmarks[] = '?';
} else { # arbitrary
if($field[0] == '!') { # ignore !
$field = substr($field, 1);
}
$ex = explode('=', $field);
$cols[] = '`' . $ex[0] . '`';
$qmarks[] = '?';
}
}
$q .= implode(', ', $cols) . ') VALUES (' . implode(',', $qmarks) . ')';
echo $q;
echo "\n";
return $db->prepare($q);
}
function getLevels($structure, $db) {
$levels = array();
$level = $structure;
$level['stmt'] = prepareStatement($level, $db);
$levels[0] = $level;
while (true) {
$newlevel = NULL;
foreach($level['cols'] as $col) {
if (is_array($col)) {
$newlevel = $col;
break;
}
}
if ($newlevel == NULL) {
break;
}
$level = $newlevel;
$level['stmt'] = prepareStatement($level, $db);
$levels[] = $level;
}
return $levels;
}
function findFields($index, $row, $structure, $prefix, $l) {
$fields = array();
foreach ($structure['cols'] as $field) {
if (is_array($field)) { # ignore
continue;
} else if ($field[0] == '_') { # ignore
continue;
} else if ($field[0] == '^') { # parent id
$fields[substr($field, 1)] = $prefix[$l - 1]['id'];
} else { # arbitrary
if($field[0] == '!') { # ignore !
$field = substr($field, 1);
}
$ex = explode('=', strtolower($field));
$field = $ex[0];
$cfield = count($ex) == 2 ? $ex[1] : $ex[0];
$fields[$field] = trim($row[$index[$cfield]]);
}
}
return $fields;
}
function arrayEquals($a1, $a2, $level) {
foreach ($level['cols'] as $field) {
if (is_array($field)) { # ignore
continue;
} else if ($field[0] == '_' || $field[0] == '!') { # ignore
continue;
} else { # with value
if ($field[0] == '^') { # parent id
$field = substr($field, 1);
}
$ex = explode('=', strtolower($field));
$field = $ex[0];
if ($a1[$field] != $a2[$field]) {
return FALSE;
}
}
}
return TRUE;
}
function insert($level, $fields, $db) {
$stmt = $level['stmt'];
# magic
$args = array($level['types']);
foreach ($fields as &$field) {
$args[] = & $field;
}
call_user_func_array(array($stmt, "bind_param"), $args);
#end magic
$stmt->execute();
return $db->insert_id;
}
function importCSV($path, $delimiter, $structure, $db) {
$levels = getLevels($structure, $db);
$f = fopen($path, 'r');
$header = fgetcsv($f, 0, $delimiter);
$index = buildIndex($header);
$prefix = array();
$line = 0;
while ($row = fgetcsv($f, 0, $delimiter)) {
$l = 0;
foreach ($levels as $level) {
$fields = findFields($index, $row, $level, $prefix, $l);
if (!isset($prefix[$l]) || !arrayEquals($fields, $prefix[$l]['fields'], $level)) {
# insert this level
$prefix = array_slice($prefix, 0, $l);
$prefix[$l]['fields'] = $fields;
$prefix[$l]['id'] = insert($level, $fields, $db);
echo 'inserted ' . $level['name'] . "\n";
}
$l++;
}
$line++;
echo $line . " rows processed\n";
}
fclose($f);
}
$db = new mysqli('localhost', 'root', 'root', 'test');
importCSV('songs.csv', '*',
array(
'name' => 'interprets',
'cols' => array('_id', 'name=contentgroup', array(
'name' => 'albums',
'cols' => array('_id', '^interpret', 'name=album', 'year', 'albumartist', 'publisher', '!web', '!password=heslo', array(
'name' => 'songs',
'cols' => array('_id', '^album', 'name=title', 'track', 'artist', 'composer', 'length', 'lyrics=text'),
'types' => 'isissii'
)),
'types' => 'isissss'
)),
'types' => 's'
), $db);
# columns starting with _ are ignored (reserved for ID)
# columns starting with ^ are reference to parent's ID
# columns starting with ! are explicitly removed from row comparison
# column may specify mapping between field in CSV and column in DB in form of equation
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment