Last active
January 18, 2018 13:06
-
-
Save mjlassila/43a6a1c2ff31353d83af0cba82635ce7 to your computer and use it in GitHub Desktop.
Modified Finna/Vufind Refworks export
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<? | |
function removeAccents( $s) | |
{ | |
$original_string = $s; | |
$s = preg_replace( '@\x{00c4}@u' , "%", $s ); // umlaut Ä => AE | |
$s = preg_replace( '@\x{00d6}@u' , "#", $s ); // umlaut Ö => OE | |
$s = preg_replace( '@\x{00dc}@u' , "UE", $s ); // umlaut Ü => UE | |
$s = preg_replace( '@\x{00e4}@u' , "<>", $s ); // umlaut ä => ae | |
$s = preg_replace( '@\x{00f6}@u' , "><", $s ); // umlaut ö => oe | |
$s = preg_replace( '@\x{00fc}@u' , "ue", $s ); // umlaut ü => ue | |
$s = preg_replace( '@\x{00f1}@u' , "ny", $s ); // ñ => ny | |
$s = preg_replace( '@\x{00ff}@u' , "yu", $s ); // ÿ => yu | |
// maps special characters (characters with diacritics) on their base-character followed by the diacritical mark | |
// exmaple: Ú => U´, á => a` | |
$s = Normalizer::normalize( $s, Normalizer::FORM_D ); | |
$s = preg_replace( '@\pM@u' , "", $s ); // removes diacritics | |
$s = str_replace( '%' , "Ä", $s ); // umlaut Ä => AE | |
$s = str_replace( '#' , "Ö", $s ); // umlaut Ö => OE | |
$s = str_replace( '<>' , "ä", $s ); // umlaut ä => ae | |
$s = str_replace( '><' , "ö", $s ); // umlaut ö => oe | |
$s = preg_replace( '@\x{00df}@u' , "ss", $s ); // maps German ß onto ss | |
$s = preg_replace( '@\x{00c6}@u' , "AE", $s ); // Æ => AE | |
$s = preg_replace( '@\x{00e6}@u' , "ae", $s ); // æ => ae | |
$s = preg_replace( '@\x{0132}@u' , "IJ", $s ); // ? => IJ | |
$s = preg_replace( '@\x{0133}@u' , "ij", $s ); // ? => ij | |
$s = preg_replace( '@\x{0152}@u' , "OE", $s ); // Œ => OE | |
$s = preg_replace( '@\x{0153}@u' , "oe", $s ); // œ => oe | |
$s = preg_replace( '@\x{00d0}@u' , "D", $s ); // Ð => D | |
$s = preg_replace( '@\x{0110}@u' , "D", $s ); // Ð => D | |
$s = preg_replace( '@\x{00f0}@u' , "d", $s ); // ð => d | |
$s = preg_replace( '@\x{0111}@u' , "d", $s ); // d => d | |
$s = preg_replace( '@\x{0126}@u' , "H", $s ); // H => H | |
$s = preg_replace( '@\x{0127}@u' , "h", $s ); // h => h | |
$s = preg_replace( '@\x{0131}@u' , "i", $s ); // i => i | |
$s = preg_replace( '@\x{0138}@u' , "k", $s ); // ? => k | |
$s = preg_replace( '@\x{013f}@u' , "L", $s ); // ? => L | |
$s = preg_replace( '@\x{0141}@u' , "L", $s ); // L => L | |
$s = preg_replace( '@\x{0140}@u' , "l", $s ); // ? => l | |
$s = preg_replace( '@\x{0142}@u' , "l", $s ); // l => l | |
$s = preg_replace( '@\x{014a}@u' , "N", $s ); // ? => N | |
$s = preg_replace( '@\x{0149}@u' , "n", $s ); // ? => n | |
$s = preg_replace( '@\x{014b}@u' , "n", $s ); // ? => n | |
$s = preg_replace( '@\x{00d8}@u' , "O", $s ); // Ø => O | |
$s = preg_replace( '@\x{00f8}@u' , "o", $s ); // ø => o | |
$s = preg_replace( '@\x{017f}@u' , "s", $s ); // ? => s | |
$s = preg_replace( '@\x{00de}@u' , "T", $s ); // Þ => T | |
$s = preg_replace( '@\x{0166}@u' , "T", $s ); // T => T | |
$s = preg_replace( '@\x{00fe}@u' , "t", $s ); // þ => t | |
$s = preg_replace( '@\x{0167}@u' , "t", $s ); // t => t | |
// remove all non-ASCii characters | |
//$s = preg_replace( '@[^\0-\x00f6]@u' , "", $s ); | |
// possible errors in UTF8-regular-expressions | |
if (empty($s)) | |
return $original_string; | |
else | |
return Normalizer::normalize( $s, Normalizer::FORM_KC ); | |
return $s; | |
} | |
echo $this->partial('RecordDriver/SolrDefault/export-refworks-format.phtml'); | |
// A driver-specific template may pass in format overrides; check for these before going to the driver itself: | |
$formats = isset($this->overrideFormats) ? $this->overrideFormats : $this->driver->tryMethod('getFormats'); | |
$format = ''; | |
$subformat = ''; | |
if (is_array($formats) && !empty($formats[0])) { | |
$parts = explode('/', !empty($formats[1]) ? $formats[1] : $formats[0]); | |
if (isset($parts[1])) { | |
$format = $parts[1]; | |
} | |
if (isset($parts[2])) { | |
$subformat = $parts[2]; | |
} | |
} | |
$refworksFormat = 'Generic'; | |
$author = $this->driver->tryMethod('getPrimaryAuthor'); | |
$series = $this->driver->tryMethod('getSeries'); | |
switch ($format) { | |
case 'Book': | |
if ($subformat == 'BookSection') { | |
$refworksFormat = 'Book, Section'; | |
} else if (empty($author)) { | |
$refworksFormat = 'Book, Edited'; | |
} else { | |
$refworksFormat = 'Book, Whole'; | |
} | |
break; | |
case 'WorkOfArt': | |
$refworksFormat = 'Artwork'; | |
break; | |
case 'Sound': | |
$refworksFormat = 'Sound Recording'; | |
break; | |
case 'Video': | |
$refworksFormat = 'Video/ DVD'; | |
break; | |
case 'Other': | |
if ($subformat == 'Software') { | |
$refworksFormat = 'Computer Program'; | |
} else if ($subformat == 'ConferenceProceeding') { | |
$refworksFormat = 'Conference Proceedings'; | |
} | |
break; | |
case 'Journal': | |
if ($subformat == 'eArticle' || $subformat == 'eSerial' || $subformat == 'eJournal') { | |
$refworksFormat = 'Journal, Electronic'; | |
} else if ($subformat == 'Newspaper') { | |
$refworksFormat = 'Newspaper Article'; | |
} else { | |
$refworksFormat = 'Journal Article'; | |
} | |
break; | |
case 'Map': | |
$refworksFormat = 'Map'; | |
break; | |
case 'Database': | |
if ($subformat == 'ResearchReport') { | |
$refworksFormat = 'Report'; | |
} | |
break; | |
case 'Thesis': | |
if (empty($series)) { | |
$refworksFormat = 'Dissertation/Thesis'; | |
} else { | |
$refworksFormat = 'Book, Whole'; | |
} | |
break; | |
} | |
$title = removeAccents(rtrim($this->driver->getTitle()), " /"); | |
echo "T1 $title\n"; | |
$series = $this->driver->tryMethod('getSeries'); | |
if (is_array($series)) { | |
foreach ($series as $key => $current) { | |
// Lets output only the first series title | |
if ($key < 1) { | |
if (is_array($current)) { | |
echo 'T2 ' . $current['name'] . "\n"; | |
if (!empty($current['number'])) { | |
echo 'VO ' . $current['number'] . "\n"; | |
} | |
} else { | |
echo "T2 $current\n"; | |
} | |
} | |
} | |
} | |
$containerTitle = $this->driver->tryMethod('getContainerTitle'); | |
if (!empty($containerTitle)) { | |
$formats = $this->driver->getFormats(); | |
if (end($formats) == '1/Book/BookSection/') { | |
echo "T2 $containerTitle\n"; | |
} else { | |
echo "JF $containerTitle\n"; | |
} | |
$volume = $this->driver->tryMethod('getContainerVolume'); | |
if (!empty($volume)) { | |
echo "VO $volume\n"; | |
} | |
$number = $this->driver->tryMethod('getContainerIssue'); | |
if (!empty($number)) { | |
echo "IS $number\n"; | |
} | |
$page = $this->driver->tryMethod('getContainerStartPage'); | |
if (!empty($page)) { | |
echo "SP $page\n"; | |
$end = $this->driver->tryMethod('getContainerEndPage'); | |
if (!empty($end)) { | |
echo "OP $end\n"; | |
} | |
} | |
} | |
// Lets see if we need page number second time in | |
// some records. | |
// if ($extents = $this->driver->tryMethod('getExtent')) { | |
// foreach (array_unique($extents) as $extent) { | |
// echo "OP $extent\n"; | |
// } | |
//} | |
// Use fallback author function only when there's no | |
// secondary authors | |
$author = $this->driver->tryMethod('getPrimaryAuthorForSearch'); | |
$function_terms_to_replace = array( | |
0 => "kirjoittaja", | |
1 => "toimittaja", | |
2 => "kääntäjä", | |
3 => "säveltäjä" | |
); | |
if (empty($author)) { | |
$author = $this->driver->tryMethod('getPrimaryAuthor'); | |
} | |
if (!empty($author) && $refworksFormat != 'Book, Edited') { | |
echo "A1 " . removeAccents(str_ireplace($function_terms_to_replace, ' ', $author)) . "\n"; | |
} | |
$secondaryAuthors = $this->driver->tryMethod('getSecondaryAuthors', [true]); | |
if (is_array($secondaryAuthors)) { | |
foreach ($secondaryAuthors as $current) { | |
echo "A1 " . removeAccents(str_ireplace($function_terms_to_replace, ' ', $current)) . "\n"; | |
} | |
} | |
$languages = $this->driver->tryMethod('getLanguages'); | |
if (is_array($languages)) { | |
foreach (array_unique($languages) as $lang) { | |
echo "LA $lang\n"; | |
} | |
} | |
$pubPlaces = $this->driver->tryMethod('getPlacesOfPublication'); | |
$pubDates = $this->driver->tryMethod('getPublicationDates'); | |
$pubNames = $this->driver->tryMethod('getPublishers'); | |
if (is_array($pubPlaces) && is_array($pubDates) && is_array($pubNames)) { | |
$total = max(count($pubPlaces), count($pubDates), count($pubNames)); | |
// if we have pub dates but no other details, we still want to export the year: | |
if ($total == 0 && count($pubDates) > 0) { | |
$total = 1; | |
} | |
for ($i = 0; $i < $total; $i++) { | |
if (isset($pubPlaces[$i])) { | |
echo "PP " . removeAccents(rtrim(str_replace(array('[', ']'), '', $pubPlaces[$i]), ':, ')). "\n"; | |
} | |
if (isset($pubNames[$i])) { | |
$pubName = preg_replace('/\s*:[^;:]*\bjakaja\b[^;:]*/', '', $pubNames[$i]); | |
if ($pubName) { | |
echo "PB " . removeAccents(rtrim($pubName, ", ")) . "\n"; | |
} | |
} | |
$date = trim($pubDates[$i], '[]. '); | |
if (strlen($date) > 4) { | |
$date = $this->dateTime()->extractYear($date); | |
} | |
if ($date) { | |
echo "YR $date\n"; | |
} | |
} | |
} | |
$edition = $this->driver->tryMethod('getEdition'); | |
if (!empty($edition)) { | |
echo "ED $edition\n"; | |
} | |
if ($urls = $this->record($this->driver)->getUrlList()) { | |
foreach ($urls as $url) { | |
echo "UL $url\n"; | |
} | |
} else { | |
echo "UL " . $this->serverUrl($this->recordLink()->getUrl($this->driver)) . "\n"; | |
} | |
$summary = $this->driver->tryMethod('getSummary'); | |
if (is_array($summary)) { | |
foreach ($summary as $current) { | |
echo "AB $current\n"; | |
} | |
} | |
// Try to find a page count in the physical description: | |
$physical = $this->driver->tryMethod('getPhysicalDescriptions'); | |
if (is_array($physical)) { | |
foreach ($physical as $current) { | |
if (preg_match('/([0-9]+\s*[sp]\b\.?)/', $current, $matches)) { | |
echo "OP {$matches[1]}\n"; | |
break; | |
} | |
} | |
} | |
$notes = $this->driver->tryMethod('getGeneralNotes'); | |
if (is_array($notes)) { | |
foreach ($notes as $note) { | |
echo "NO $note\n"; | |
} | |
} | |
if ($note = $this->driver->tryMethod('getDissertationNote')) { | |
echo "NO $note\n"; | |
} | |
$isbns = $this->driver->tryMethod('getISBNs'); | |
if (is_array($isbns)) { | |
foreach ($isbns as $isbn) { | |
echo "SN $isbn\n"; | |
} | |
} | |
$issns = $this->driver->tryMethod('getISSNs', [true]); | |
if (is_array($issns)) { | |
foreach ($issns as $issn) { | |
echo "SN $issn\n"; | |
} | |
} | |
$subjects = $this->driver->tryMethod('getAllSubjectHeadings'); | |
if (is_array($subjects)) { | |
foreach ($subjects as $subject) { | |
if (is_array($subject)) { | |
$subject = array_map( | |
function ($val) { | |
return rtrim(removeAccents($val), '. '); | |
}, | |
$subject | |
); | |
} else { | |
$subject = removeAccents(rtrim($subject, '. ')); | |
} | |
$subject = is_array($subject) ? implode(' : ', $subject) : $subject; | |
$clean_subject = removeAccents($subject); | |
echo "K1 $clean_subject\n"; | |
} | |
} | |
if ($doi = $this->driver->tryMethod('getCleanDOI')) { | |
echo "DO $doi\n"; | |
} | |
// Record separator: | |
echo "\n"; | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment