Skip to content

Instantly share code, notes, and snippets.

@zuphilip
Created September 14, 2014 16:44
Show Gist options
  • Save zuphilip/0264f00aa77f8552b074 to your computer and use it in GitHub Desktop.
Save zuphilip/0264f00aa77f8552b074 to your computer and use it in GitHub Desktop.
CSL styles: Delete more unnecessary et-al-subsequent statements
#!/usr/bin/perl
# IMPORTANT: encoding of this file is utf-8 and line ends are unix style
use strict;
use warnings;
use XML::LibXML;
use File::Basename;
use utf8;
#setting the correct encoding for the command line in Windows
#binmode(STDOUT , ":encoding(cp437)" );
#binmode(STDOUT);
##################################################
# Options: configure how the script is performed #
##################################################
my $inputPath = './styles-for-small-tests/';
#my $inputPath = './styles/';
my $numberFixed = 0;
my $numberTotal = 0;
##################################################
#
##################################################
open(my $logOutput, '>:encoding(UTF-8)', 'output.log' ) or die "Could not open 'output.log' for writing $!";
my @XmlFileList = glob($inputPath . '*.csl');
foreach my $filename (@XmlFileList) {
my $parser = XML::LibXML->new();
my $currentDoc = $parser->parse_file($filename);#XML::LibXML->new
my $xc = XML::LibXML::XPathContext->new($currentDoc);
$xc->registerNs('csl', 'http://purl.org/net/xbiblio/csl');
my $docChanged = 0;
my @currentNodes = $xc->findnodes('//*[@et-al-subsequent-min or @et-al-subsequent-use-first]');
foreach my $node (@currentNodes) {
my $etAlMin = $node->getAttribute('et-al-min') || '';
my $etAlUseFirst = $node->getAttribute('et-al-use-first') || '';
my $etAlSubsequentMin = $node->getAttribute('et-al-subsequent-min') || '';
my $etAlSubsequentUseFirst = $node->getAttribute('et-al-subsequent-use-first') || '';
my $headerLine = "\n$filename\n";
$numberTotal++;
if ($etAlMin == $etAlSubsequentMin && $etAlUseFirst == $etAlSubsequentUseFirst) {
$docChanged = 1;
$numberFixed++;
$node->removeAttribute('et-al-subsequent-min');
$node->removeAttribute('et-al-subsequent-use-first');
$headerLine = "\n" . "$filename --> subsequent not needed\n";
}
if ($etAlSubsequentUseFirst > $etAlSubsequentMin) {
$headerLine = "\n" . "$filename --> et-al-subsequent-use-first > et-al-subsequent-min\n";
}
print $logOutput $headerLine;
print $headerLine;
print $logOutput " " . $node->nodeName . "\n " . $etAlMin . '/' . $etAlUseFirst . " (et-al-min/et-al-use-first)\n " . $etAlSubsequentMin . '/' . $etAlSubsequentUseFirst . " (et-al-subsequent-min/et-al-subsequent-use-first)\n";
print " " . $node->nodeName . "\n " . $etAlMin . '/' . $etAlUseFirst . "\n " . $etAlSubsequentMin . '/' . $etAlSubsequentUseFirst . "\n";
}
if ($docChanged==1) {
open(my $xmlFileHandle, '>', $filename ) or die "Could not open $filename for writing $!";#:encoding(UTF-8)
print $xmlFileHandle $currentDoc->toString;
close $xmlFileHandle;
}
}
print "\n\n==> $numberFixed fixable, $numberTotal total\n\n";
close $logOutput
./styles/academy-of-management-review.csl
citation
6/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/american-journal-of-agricultural-economics.csl
citation
4/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/american-journal-of-political-science.csl
citation
4/1 (et-al-min/et-al-use-first)
6/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/american-sociological-association.csl
citation
4/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/apa-5th-edition.csl
citation
6/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/apa-annotated-bibliography.csl
citation
6/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/apa-cv.csl
citation
6/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/apa-fr-provost.csl
citation
6/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/apa-fr-universite-de-montreal.csl
citation
6/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/apa-no-doi-no-issue.csl
citation
6/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/apa-single-spaced.csl
citation
6/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/apa-tr.csl
citation
6/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/apa.csl
citation
6/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/associacao-nacional-de-pesquisa-e-ensino-em-transportes.csl
citation
6/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/avian-pathology.csl
citation
6/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/biological-journal-of-the-linnean-society.csl
style
4/3 (et-al-min/et-al-use-first)
3/2 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/biological-journal-of-the-linnean-society.csl
name
4/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/biological-reviews.csl
citation
4/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/biometrics.csl
citation
4/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/british-ecological-society.csl
citation
4/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/campus-adventiste-du-saleve-faculte-adventiste-de-theologie.csl
name
3/3 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/campus-adventiste-du-saleve-faculte-adventiste-de-theologie.csl
name
3/3 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/campus-adventiste-du-saleve-faculte-adventiste-de-theologie.csl
name
3/3 (et-al-min/et-al-use-first)
1/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/campus-adventiste-du-saleve-faculte-adventiste-de-theologie.csl
name
3/3 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/campus-adventiste-du-saleve-faculte-adventiste-de-theologie.csl
citation
3/3 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/centaurus.csl
citation
4/1 (et-al-min/et-al-use-first)
6/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/decision-sciences.csl
style
/ (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/deutsche-gesellschaft-fur-psychologie.csl
citation
6/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/die-bachelorarbeit-samac-et-al-in-text.csl
citation
6/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/die-bachelorarbeit-samac-et-al-note.csl
citation
6/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/freshwater-biology.csl
citation
4/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/harvard-anglia-ruskin-university.csl
citation
5/1 (et-al-min/et-al-use-first)
4/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/harvard-london-south-bank-university.csl
citation
3/1 (et-al-min/et-al-use-first)
1/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/harvard-london-south-bank-university.csl
bibliography
6/6 (et-al-min/et-al-use-first)
1/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/harvard-pontificia-universidad-catolica-del-ecuador.csl
citation
6/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/harvard-the-university-of-melbourne.csl --> et-al-subsequent-use-first > et-al-subsequent-min
citation
4/ (et-al-min/et-al-use-first)
/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/human-reproduction.csl
name
/10 (et-al-min/et-al-use-first)
8/ (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/human-resource-management-journal.csl
citation
6/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/international-studies-association.csl
citation
7/6 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/irish-historical-studies.csl
citation
7/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/journal-of-animal-physiology-and-animal-nutrition.csl
style
2/2 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/journal-of-consumer-research.csl
citation
4/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/journal-of-fish-diseases.csl
citation
99/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/journal-of-management.csl
citation
6/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/journal-of-the-association-for-information-systems.csl
citation
6/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/journal-of-zoology.csl
citation
4/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/kindheit-und-entwicklung.csl
citation
6/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/language-in-society.csl
citation
5/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/les-journees-de-la-recherche-avicole.csl
citation
2/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/les-journees-de-la-recherche-porcine.csl
citation
2/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/lettres-et-sciences-humaines-fr.csl
name
4/3 (et-al-min/et-al-use-first)
4/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/lettres-et-sciences-humaines-fr.csl
name
4/3 (et-al-min/et-al-use-first)
4/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/lluelles-lexisnexis.csl
citation
/ (et-al-min/et-al-use-first)
6/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/lluelles.csl
citation
/ (et-al-min/et-al-use-first)
6/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/meteoritics-and-planetary-science.csl
citation
10/1 (et-al-min/et-al-use-first)
6/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/political-studies.csl
citation
6/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/seminaire-saint-sulpice-ecole-theologie.csl
name
3/3 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/seminaire-saint-sulpice-ecole-theologie.csl
name
3/3 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/seminaire-saint-sulpice-ecole-theologie.csl
name
3/3 (et-al-min/et-al-use-first)
1/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/seminaire-saint-sulpice-ecole-theologie.csl
name
3/3 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/seminaire-saint-sulpice-ecole-theologie.csl
citation
3/3 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/spanish-legal.csl
citation
6/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/strategic-management-journal.csl
citation
4/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/the-journal-of-hellenic-studies.csl
citation
3/1 (et-al-min/et-al-use-first)
1/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/thomson-reuters-legal-tax-and-accounting-australia.csl
citation
20/19 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/traces.csl
citation
4/1 (et-al-min/et-al-use-first)
6/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/universite-de-sherbrooke-faculte-d-education.csl
citation
6/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/urban-studies.csl
citation
6/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/user-modeling-and-user-adapted-interaction.csl
citation
3/1 (et-al-min/et-al-use-first)
2/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/vienna-legal.csl
citation
6/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/yeast.csl
citation
10/1 (et-al-min/et-al-use-first)
6/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/zookeys.csl
citation
2/2 (et-al-min/et-al-use-first)
2/1 (et-al-subsequent-min/et-al-subsequent-use-first)
./styles/zoological-journal-of-the-linnean-society.csl
citation
4/1 (et-al-min/et-al-use-first)
3/1 (et-al-subsequent-min/et-al-subsequent-use-first)
@zuphilip
Copy link
Author

I had to undo the replacements of character encodings which the scripts performs, e.g. replace "-" back to "& # 8 2 1 1 ;". However, this may be handled in the same manner as in this python script, i.e. replace line 79 with

my $currentDocString = $currentDoc->toString;
$_ =~ s/([a-zA-Z])-([a-z])/$1$2/g;
$currentDoc =~ s/ / /g; #no-break space
$currentDoc =~ s//ᵉ/g;
$currentDoc =~ s//‑/g; #non-breaking hyphen
$currentDoc =~ s//–/g; #en dash
$currentDoc =~ s//—/g; #em dash
$currentDoc =~ s// /g;#narrow no-break space
print $xmlFileHandle $currentDocString;

This code is not yet tested and especially the regexp might need some unescaping. Moreover, it is critical here, that not too much of the xml is replaced (don't we have to restrict to attribute-values??).

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment