Skip to content

Instantly share code, notes, and snippets.

@mjlassila
Last active January 18, 2018 13:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mjlassila/43a6a1c2ff31353d83af0cba82635ce7 to your computer and use it in GitHub Desktop.
Save mjlassila/43a6a1c2ff31353d83af0cba82635ce7 to your computer and use it in GitHub Desktop.
Modified Finna/Vufind Refworks export
<?
function removeAccents( $s)
{
$original_string = $s;
$s = preg_replace( '@\x{00c4}@u' , "%", $s ); // umlaut Ä => AE
$s = preg_replace( '@\x{00d6}@u' , "#", $s ); // umlaut Ö => OE
$s = preg_replace( '@\x{00dc}@u' , "UE", $s ); // umlaut Ü => UE
$s = preg_replace( '@\x{00e4}@u' , "<>", $s ); // umlaut ä => ae
$s = preg_replace( '@\x{00f6}@u' , "><", $s ); // umlaut ö => oe
$s = preg_replace( '@\x{00fc}@u' , "ue", $s ); // umlaut ü => ue
$s = preg_replace( '@\x{00f1}@u' , "ny", $s ); // ñ => ny
$s = preg_replace( '@\x{00ff}@u' , "yu", $s ); // ÿ => yu
// maps special characters (characters with diacritics) on their base-character followed by the diacritical mark
// exmaple: Ú => U´, á => a`
$s = Normalizer::normalize( $s, Normalizer::FORM_D );
$s = preg_replace( '@\pM@u' , "", $s ); // removes diacritics
$s = str_replace( '%' , "Ä", $s ); // umlaut Ä => AE
$s = str_replace( '#' , "Ö", $s ); // umlaut Ö => OE
$s = str_replace( '<>' , "ä", $s ); // umlaut ä => ae
$s = str_replace( '><' , "ö", $s ); // umlaut ö => oe
$s = preg_replace( '@\x{00df}@u' , "ss", $s ); // maps German ß onto ss
$s = preg_replace( '@\x{00c6}@u' , "AE", $s ); // Æ => AE
$s = preg_replace( '@\x{00e6}@u' , "ae", $s ); // æ => ae
$s = preg_replace( '@\x{0132}@u' , "IJ", $s ); // ? => IJ
$s = preg_replace( '@\x{0133}@u' , "ij", $s ); // ? => ij
$s = preg_replace( '@\x{0152}@u' , "OE", $s ); // Π=> OE
$s = preg_replace( '@\x{0153}@u' , "oe", $s ); // œ => oe
$s = preg_replace( '@\x{00d0}@u' , "D", $s ); // Ð => D
$s = preg_replace( '@\x{0110}@u' , "D", $s ); // Ð => D
$s = preg_replace( '@\x{00f0}@u' , "d", $s ); // ð => d
$s = preg_replace( '@\x{0111}@u' , "d", $s ); // d => d
$s = preg_replace( '@\x{0126}@u' , "H", $s ); // H => H
$s = preg_replace( '@\x{0127}@u' , "h", $s ); // h => h
$s = preg_replace( '@\x{0131}@u' , "i", $s ); // i => i
$s = preg_replace( '@\x{0138}@u' , "k", $s ); // ? => k
$s = preg_replace( '@\x{013f}@u' , "L", $s ); // ? => L
$s = preg_replace( '@\x{0141}@u' , "L", $s ); // L => L
$s = preg_replace( '@\x{0140}@u' , "l", $s ); // ? => l
$s = preg_replace( '@\x{0142}@u' , "l", $s ); // l => l
$s = preg_replace( '@\x{014a}@u' , "N", $s ); // ? => N
$s = preg_replace( '@\x{0149}@u' , "n", $s ); // ? => n
$s = preg_replace( '@\x{014b}@u' , "n", $s ); // ? => n
$s = preg_replace( '@\x{00d8}@u' , "O", $s ); // Ø => O
$s = preg_replace( '@\x{00f8}@u' , "o", $s ); // ø => o
$s = preg_replace( '@\x{017f}@u' , "s", $s ); // ? => s
$s = preg_replace( '@\x{00de}@u' , "T", $s ); // Þ => T
$s = preg_replace( '@\x{0166}@u' , "T", $s ); // T => T
$s = preg_replace( '@\x{00fe}@u' , "t", $s ); // þ => t
$s = preg_replace( '@\x{0167}@u' , "t", $s ); // t => t
// remove all non-ASCii characters
//$s = preg_replace( '@[^\0-\x00f6]@u' , "", $s );
// possible errors in UTF8-regular-expressions
if (empty($s))
return $original_string;
else
return Normalizer::normalize( $s, Normalizer::FORM_KC );
return $s;
}
echo $this->partial('RecordDriver/SolrDefault/export-refworks-format.phtml');
// A driver-specific template may pass in format overrides; check for these before going to the driver itself:
$formats = isset($this->overrideFormats) ? $this->overrideFormats : $this->driver->tryMethod('getFormats');
$format = '';
$subformat = '';
if (is_array($formats) && !empty($formats[0])) {
$parts = explode('/', !empty($formats[1]) ? $formats[1] : $formats[0]);
if (isset($parts[1])) {
$format = $parts[1];
}
if (isset($parts[2])) {
$subformat = $parts[2];
}
}
$refworksFormat = 'Generic';
$author = $this->driver->tryMethod('getPrimaryAuthor');
$series = $this->driver->tryMethod('getSeries');
switch ($format) {
case 'Book':
if ($subformat == 'BookSection') {
$refworksFormat = 'Book, Section';
} else if (empty($author)) {
$refworksFormat = 'Book, Edited';
} else {
$refworksFormat = 'Book, Whole';
}
break;
case 'WorkOfArt':
$refworksFormat = 'Artwork';
break;
case 'Sound':
$refworksFormat = 'Sound Recording';
break;
case 'Video':
$refworksFormat = 'Video/ DVD';
break;
case 'Other':
if ($subformat == 'Software') {
$refworksFormat = 'Computer Program';
} else if ($subformat == 'ConferenceProceeding') {
$refworksFormat = 'Conference Proceedings';
}
break;
case 'Journal':
if ($subformat == 'eArticle' || $subformat == 'eSerial' || $subformat == 'eJournal') {
$refworksFormat = 'Journal, Electronic';
} else if ($subformat == 'Newspaper') {
$refworksFormat = 'Newspaper Article';
} else {
$refworksFormat = 'Journal Article';
}
break;
case 'Map':
$refworksFormat = 'Map';
break;
case 'Database':
if ($subformat == 'ResearchReport') {
$refworksFormat = 'Report';
}
break;
case 'Thesis':
if (empty($series)) {
$refworksFormat = 'Dissertation/Thesis';
} else {
$refworksFormat = 'Book, Whole';
}
break;
}
$title = removeAccents(rtrim($this->driver->getTitle()), " /");
echo "T1 $title\n";
$series = $this->driver->tryMethod('getSeries');
if (is_array($series)) {
foreach ($series as $key => $current) {
// Lets output only the first series title
if ($key < 1) {
if (is_array($current)) {
echo 'T2 ' . $current['name'] . "\n";
if (!empty($current['number'])) {
echo 'VO ' . $current['number'] . "\n";
}
} else {
echo "T2 $current\n";
}
}
}
}
$containerTitle = $this->driver->tryMethod('getContainerTitle');
if (!empty($containerTitle)) {
$formats = $this->driver->getFormats();
if (end($formats) == '1/Book/BookSection/') {
echo "T2 $containerTitle\n";
} else {
echo "JF $containerTitle\n";
}
$volume = $this->driver->tryMethod('getContainerVolume');
if (!empty($volume)) {
echo "VO $volume\n";
}
$number = $this->driver->tryMethod('getContainerIssue');
if (!empty($number)) {
echo "IS $number\n";
}
$page = $this->driver->tryMethod('getContainerStartPage');
if (!empty($page)) {
echo "SP $page\n";
$end = $this->driver->tryMethod('getContainerEndPage');
if (!empty($end)) {
echo "OP $end\n";
}
}
}
// Lets see if we need page number second time in
// some records.
// if ($extents = $this->driver->tryMethod('getExtent')) {
// foreach (array_unique($extents) as $extent) {
// echo "OP $extent\n";
// }
//}
// Use fallback author function only when there's no
// secondary authors
$author = $this->driver->tryMethod('getPrimaryAuthorForSearch');
$function_terms_to_replace = array(
0 => "kirjoittaja",
1 => "toimittaja",
2 => "kääntäjä",
3 => "säveltäjä"
);
if (empty($author)) {
$author = $this->driver->tryMethod('getPrimaryAuthor');
}
if (!empty($author) && $refworksFormat != 'Book, Edited') {
echo "A1 " . removeAccents(str_ireplace($function_terms_to_replace, ' ', $author)) . "\n";
}
$secondaryAuthors = $this->driver->tryMethod('getSecondaryAuthors', [true]);
if (is_array($secondaryAuthors)) {
foreach ($secondaryAuthors as $current) {
echo "A1 " . removeAccents(str_ireplace($function_terms_to_replace, ' ', $current)) . "\n";
}
}
$languages = $this->driver->tryMethod('getLanguages');
if (is_array($languages)) {
foreach (array_unique($languages) as $lang) {
echo "LA $lang\n";
}
}
$pubPlaces = $this->driver->tryMethod('getPlacesOfPublication');
$pubDates = $this->driver->tryMethod('getPublicationDates');
$pubNames = $this->driver->tryMethod('getPublishers');
if (is_array($pubPlaces) && is_array($pubDates) && is_array($pubNames)) {
$total = max(count($pubPlaces), count($pubDates), count($pubNames));
// if we have pub dates but no other details, we still want to export the year:
if ($total == 0 && count($pubDates) > 0) {
$total = 1;
}
for ($i = 0; $i < $total; $i++) {
if (isset($pubPlaces[$i])) {
echo "PP " . removeAccents(rtrim(str_replace(array('[', ']'), '', $pubPlaces[$i]), ':, ')). "\n";
}
if (isset($pubNames[$i])) {
$pubName = preg_replace('/\s*:[^;:]*\bjakaja\b[^;:]*/', '', $pubNames[$i]);
if ($pubName) {
echo "PB " . removeAccents(rtrim($pubName, ", ")) . "\n";
}
}
$date = trim($pubDates[$i], '[]. ');
if (strlen($date) > 4) {
$date = $this->dateTime()->extractYear($date);
}
if ($date) {
echo "YR $date\n";
}
}
}
$edition = $this->driver->tryMethod('getEdition');
if (!empty($edition)) {
echo "ED $edition\n";
}
if ($urls = $this->record($this->driver)->getUrlList()) {
foreach ($urls as $url) {
echo "UL $url\n";
}
} else {
echo "UL " . $this->serverUrl($this->recordLink()->getUrl($this->driver)) . "\n";
}
$summary = $this->driver->tryMethod('getSummary');
if (is_array($summary)) {
foreach ($summary as $current) {
echo "AB $current\n";
}
}
// Try to find a page count in the physical description:
$physical = $this->driver->tryMethod('getPhysicalDescriptions');
if (is_array($physical)) {
foreach ($physical as $current) {
if (preg_match('/([0-9]+\s*[sp]\b\.?)/', $current, $matches)) {
echo "OP {$matches[1]}\n";
break;
}
}
}
$notes = $this->driver->tryMethod('getGeneralNotes');
if (is_array($notes)) {
foreach ($notes as $note) {
echo "NO $note\n";
}
}
if ($note = $this->driver->tryMethod('getDissertationNote')) {
echo "NO $note\n";
}
$isbns = $this->driver->tryMethod('getISBNs');
if (is_array($isbns)) {
foreach ($isbns as $isbn) {
echo "SN $isbn\n";
}
}
$issns = $this->driver->tryMethod('getISSNs', [true]);
if (is_array($issns)) {
foreach ($issns as $issn) {
echo "SN $issn\n";
}
}
$subjects = $this->driver->tryMethod('getAllSubjectHeadings');
if (is_array($subjects)) {
foreach ($subjects as $subject) {
if (is_array($subject)) {
$subject = array_map(
function ($val) {
return rtrim(removeAccents($val), '. ');
},
$subject
);
} else {
$subject = removeAccents(rtrim($subject, '. '));
}
$subject = is_array($subject) ? implode(' : ', $subject) : $subject;
$clean_subject = removeAccents($subject);
echo "K1 $clean_subject\n";
}
}
if ($doi = $this->driver->tryMethod('getCleanDOI')) {
echo "DO $doi\n";
}
// Record separator:
echo "\n";
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment