Created
October 16, 2013 17:57
-
-
Save roblivian/7012077 to your computer and use it in GitHub Desktop.
bad first attempts at getting MARC records out of sierra's postgres tables.....and no, we shouldn't have to do this. iii--
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// first pass at generating a MARC record from III's postgres tables | |
// very ugly | |
$start = time(); | |
header("Content-type: text/plain; charset=utf8"); | |
require_once("File/MARC.php"); | |
$host = ""; | |
$port = "1032"; | |
$dbname = "iii"; | |
$user = ""; | |
$password = ""; | |
try { | |
$dbh = new PDO('pgsql:host='. $host .';port='. $port .';dbname='.$dbname | |
.';user='. $user .';password='. $password); | |
$dbh->setAttribute(PDO::ATTR_EMULATE_PREPARES, true); | |
$dbh->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION); | |
} catch (PDOException $e) { | |
print "Error: ". $e->getMessage() ."\n"; | |
die(); | |
} | |
// man, do we have a ton of invalid marc fields. this one is just a test | |
$ignore_marc_fields = array ( | |
'002', | |
); | |
$marc21_file = fopen("/tmp/blah.out", "wb"); | |
// get updated bib_nos | |
$_updated_bibs_stmt = $dbh->prepare("SELECT m.record_num, m.creation_date_gmt, | |
m.deletion_date_gmt, m.campus_code, m.record_last_updated_gmt | |
FROM sierra_view.record_metadata m | |
LEFT JOIN sierra_view.bib_view b ON m.record_num = b.record_num | |
WHERE m.record_type_code='b' | |
AND m.record_last_updated_gmt >= '2013-10-10' | |
AND m.deletion_date_gmt IS NULL | |
AND m.campus_code = '' | |
AND b.bcode3 != 's' | |
"); | |
// get bib/marc info | |
$_subfield_stmt = $dbh->prepare("SELECT * FROM sierra_view.bib_view b | |
LEFT JOIN sierra_view.subfield m ON m.record_id = b.id | |
WHERE b.record_num = ? | |
ORDER BY marc_tag ASC, occ_num ASC, display_order ASC, tag ASC"); | |
// get control fields | |
// get 008 | |
$_008_stmt = $dbh->prepare("SELECT * FROM sierra_view.control_field c | |
JOIN sierra_view.bib_view b on b.id = c.record_id | |
WHERE b.record_num = ? | |
AND c.control_num = 8 | |
LIMIT 1"); | |
// get 007 | |
$_007_stmt = $dbh->prepare("SELECT * FROM sierra_view.control_field c | |
JOIN sierra_view.bib_view b on b.id = c.record_id | |
WHERE b.record_num = ? | |
AND c.control_num = 7 | |
LIMIT 1"); | |
// get 006 | |
$_006_stmt = $dbh->prepare("SELECT * FROM sierra_view.control_field c | |
JOIN sierra_view.bib_view b on b.id = c.record_id | |
WHERE b.record_num = ? | |
AND c.control_num = 6 | |
LIMIT 1"); | |
// get 907 | |
$_907_stmt = $dbh->prepare("SELECT | |
to_char(record_last_updated_gmt, 'MM-DD-YY') AS last_updated, | |
to_char(creation_date_gmt, 'MM-DD-YY') AS created | |
FROM sierra_view.record_metadata | |
WHERE record_type_code='b' | |
AND record_num=?"); | |
$_updated_bibs_stmt->execute(); | |
$j = 0; | |
while($bib_row = $_updated_bibs_stmt->fetch(PDO::FETCH_ASSOC)) { | |
unset($marc); | |
unset($subfield); | |
unset($field); | |
unset($title); | |
$bib_no = $bib_row['record_num']; | |
print ++$j .". ". $bib_no ." - "; | |
$marc = new File_MARC_Record(); | |
try { | |
$_subfield_stmt->execute(array($bib_no)); | |
} catch (PDOException $e) { | |
print "Error: ". print_r($e, 1) ."\n"; | |
print "stmt: ". print_r($_subfield_stmt, 1) ."\n"; | |
die(); | |
} | |
// count row (used to detect first marc_tag, and if we even found a bib at all) | |
$i = 0; | |
// detect when we've changed tags/fields | |
$prev_occ_num = ''; | |
$prev_marc_tag = ''; | |
while($row = $_subfield_stmt->fetch(PDO::FETCH_ASSOC)) { | |
//print_r($row, 1); | |
$row['marc_tag'] = trim($row['marc_tag']); | |
if ($row['marc_tag'] != '' | |
&& !in_array($row['marc_tag'], $ignore_marc_fields)) { | |
// same tag and same field | |
if (($row['marc_tag'] == $prev_marc_tag) | |
&& ($row['occ_num'] == $prev_occ_num)) { | |
#print "add_subfield\n"; | |
$subfield = new File_MARC_Subfield($row['tag'], $row['content']); | |
$field->appendSubfield($subfield); | |
#print $bib_no ."\n"; | |
#print "\n"; | |
// same tag, but different field | |
} elseif (($row['marc_tag'] == $prev_marc_tag) | |
&& ($row['occ_num'] != $prev_occ_num)) { | |
#print "add_prev_field\n"; | |
$marc->appendField($field); | |
#print "new_field\n"; | |
if (substr($row['marc_tag'], 0, 2) == '00' && $row['content'] != '') { | |
$field = new File_MARC_Control_Field($row['marc_tag'], $row['content']); | |
} else { | |
$field = new File_MARC_Data_Field($row['marc_tag']); | |
#print "add_subfield\n"; | |
$subfield = new File_MARC_Subfield($row['tag'], $row['content']); | |
$field->appendSubfield($subfield); | |
} | |
#print "add_indicators\n"; | |
if (isset($row['marc_ind1'])) { | |
$field->setIndicator(1, $row['marc_ind1']); | |
} else { | |
$field->setIndicator(1, " "); | |
} | |
if (isset($row['marc_ind2'])) { | |
$field->setIndicator(2, $row['marc_ind2']); | |
} else { | |
$field->setIndicator(2, " "); | |
} | |
#print "\n"; | |
// new tag | |
} elseif ($row['marc_tag'] != $prev_marc_tag) { | |
// we've already had one tag before this | |
if ($i) { | |
#print "add_prev_field\n"; | |
$marc->appendField($field); | |
} | |
// this is a control field (00x) | |
if (substr($row['marc_tag'], 0, 2) == '00') { | |
$field = new File_MARC_Control_Field($row['marc_tag'], $row['content']); | |
// this is a data field | |
} elseif ($row['marc_tag']) { | |
$field = new File_MARC_Data_Field($row['marc_tag']); | |
#print "add_subfield\n"; | |
$subfield = new File_MARC_Subfield($row['tag'], $row['content']); | |
$field->appendSubfield($subfield); | |
#print "add_indicators\n"; | |
if (isset($row['marc_ind1'])) { | |
$field->setIndicator(1, $row['marc_ind1']); | |
} else { | |
$field->setIndicator(1, " "); | |
} | |
if (isset($row['marc_ind2'])) { | |
$field->setIndicator(2, $row['marc_ind2']); | |
} else { | |
$field->setIndicator(2, " "); | |
} | |
} | |
#print "\n"; | |
$i++; | |
} | |
} else { | |
print "\nERROR : ". $bib_no . " marc_tag == ". $row['marc_tag'] ." - ". | |
$row['content'] ."\n"; | |
$bib_errors[] = $bib_no; | |
} | |
$prev_occ_num = $row['occ_num']; | |
$prev_marc_tag = $row['marc_tag']; | |
$last_row = $row; | |
} | |
// add last field | |
#print "add_prev_field\n"; | |
if ($field) { | |
$marc->appendField($field); | |
} | |
$title_field = $marc->getField('245'); | |
if ($title_field) { | |
$title = $title_field->getSubfield('a'); | |
} else { | |
$title = $last_row['title']; | |
$_245_field = new File_MARC_Data_Field('245', | |
array( | |
new File_MARC_Subfield('a', $title), | |
)); | |
$marc->appendField($_245_field); | |
} | |
// no bibs/fields were found, so error and exit | |
if (!$marc) { | |
print "ERROR - no bibs/fields found for : ". $bib_no ." - ". | |
$title ."\n"; | |
$bib_errors[] = $bib_no; | |
} else { | |
print "MARC created for : ". $bib_no ." - ". $title ."\n"; | |
} | |
$_008_stmt->execute(array($bib_no)); | |
$_008_row = $_008_stmt->fetch(PDO::FETCH_ASSOC); | |
$_008 = ""; | |
for ($i = 0; $i <= 39; $i++) { | |
$num = 'p'. str_pad($i, 2, 0, STR_PAD_LEFT); | |
$_008 .= $_008_row[$num]; | |
} | |
$marc->prependField(new File_MARC_Control_Field('008', $_008)); | |
$_008_stmt ->closeCursor(); | |
$_007_stmt->execute(array($bib_no)); | |
$_007_row = $_007_stmt->fetch(PDO::FETCH_ASSOC); | |
$_007 = ""; | |
for ($i = 0; $i <= 39; $i++) { | |
$num = 'p'. str_pad($i, 2, 0, STR_PAD_LEFT); | |
$_007 .= $_007_row[$num]; | |
} | |
$_007_stmt ->closeCursor(); | |
$marc->prependField(new File_MARC_Control_Field('007', $_007)); | |
$_006_stmt->execute(array($bib_no)); | |
$_006_row = $_006_stmt->fetch(PDO::FETCH_ASSOC); | |
$_006 = ""; | |
for ($i = 0; $i <= 39; $i++) { | |
$num = 'p'. str_pad($i, 2, 0, STR_PAD_LEFT); | |
$_006 .= $_006_row[$num]; | |
} | |
$marc->prependField(new File_MARC_Control_Field('006', $_006)); | |
$_006_stmt ->closeCursor(); | |
// calculate checkdigit for 907 output | |
$bib_no_with_checkdigit = $bib_no . shrew_calc_checkdigit($bib_no); | |
$_907_stmt->execute(array($bib_no)); | |
$_907_row = $_907_stmt->fetch(PDO::FETCH_ASSOC); | |
if (isset($_907_row['last_updated'])) { | |
$_907_field = new File_MARC_Data_Field('907', | |
array( | |
new File_MARC_Subfield('a', '.b'. $bib_no_with_checkdigit), | |
new File_MARC_Subfield('b', $_907_row['last_updated']), | |
new File_MARC_Subfield('c', $_907_row['created']), | |
)); | |
$marc->appendField($_907_field); | |
} | |
$_907_stmt ->closeCursor(); | |
// TODO | |
// need to add 945 fields. not sure where all that info is pulled from yet | |
fwrite($marc21_file, $marc->toRaw()); | |
#print $marc; | |
} | |
fclose($marc21_file); | |
$end = time(); | |
$delta = $end - $start; | |
print "total_rows: ". $j ."\n"; | |
print "total_errors: ". count($bib_errors) ."\n"; | |
//print "errors: ". print_r($bib_errors, 1); | |
print "\n\ndelta: ". $delta; | |
// thanks to mark matienzo (anarchivist) | |
// https://github.com/anarchivist/drupal-shrew/ | |
function shrew_calc_checkdigit($recnum) { | |
$seq = array_reverse(str_split($recnum)); | |
$sum = 0; | |
$multiplier = 2; | |
foreach ($seq as $digit) { | |
$digit *= $multiplier; | |
$sum += $digit; | |
$multiplier++; | |
} | |
$check = $sum % 11; | |
if ($check == 10) { | |
return 'x'; | |
} | |
else { | |
return strval($check); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment