Skip to content

Instantly share code, notes, and snippets.

@mjlassila
Last active December 26, 2015 10:09
Show Gist options
  • Save mjlassila/7134622 to your computer and use it in GitHub Desktop.
Save mjlassila/7134622 to your computer and use it in GitHub Desktop.
Creates filtered RSS feed of new items added to Jyväskylä University Library catalog.
#!/m1/oracle/app/oracle/product/11.2.0/db_1/perl/bin/perl
# this program processes a flat file created by M. Doran's # newbooks system (newbooks.txt)
# author of this program: # w. grotophorst, (c) 2005, Lost Packet Planet
# Program may be freely copied, modified & improved.
# Support for book covers and feed filtering by mjlassila, 2013.
#########################################
# use strict;
# use warnings;
use open qw/:std :utf8/;
use utf8;
require LWP::UserAgent;
use List::Util qw/first/;
# # less variable variables #
$fromlink = "https://jykdok.linneanet.fi/";
$inputfile = "full-path-to-newbooks/newBooks.txt";
$NumToFeed = 8;
$URL2Jykdok = "https://jyu.finna.fi/Record/jykdok.";
#########################################
$titlestr = "Uutuudet - JYKDOK";
open my $reader, $inputfile or die "RSS error";
binmode STDOUT, ':utf8';
print "Content-type: text/xml\n\n";
print "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
print "<rss version=\"2.0\" xmlns:atom=\"http://www.w3.org/2005/Atom\" xmlns:media=\"http://search.yahoo.com/mrss/\">\n";
print "<channel>\n";
print "<atom:link href=\"https://jykdok.linneanet.fi/cgi-bin/newbooks-rss.cgi\" rel=\"self\" type=\"application/rss+xml\" />";
print "<title>$titlestr</title>\n";
print "<link>$fromlink</link>\n";
print "<description>Jyväskylän yliopiston kirjaston uudet hankinnat.</description>\n";
$foundit = 0;
$numfed = 0;
my $useragent = LWP::UserAgent->new;
my @processed_isbns = ();
while (my $line = <$reader>){ if ($numfed < $NumToFeed) { # check string to see if characters matching call# stem appear anywhere, if # not, go on to next line from newbooks.txt $t = index($line,$ToFind);
$line =~ s/>/&gt;/g;
$line =~ s/</&lt;/g;
$line =~ s/"/&quot;/g;
$line =~ s/'/&apos;/g;
$line =~ s/&/&amp;/g;
@itemdata = split(/\t/,$line);
$call = @itemdata[7];
$isbn = $itemdata[1];
$isbn =~ s/-//g;
if (($line =~ /Pääkirjasto. Kirjat./) || ($line =~ /Pääkirjasto. Kurssikirjat./)){
# Oletetaan että useat kappaleet samaa nidettä esiintyvät peräkkäin
unless($isbn eq $processed_isbns[0]){
$url = 'http://www.booky.fi/image.php?id='.$isbn;
my $result = $useragent->get($url);
$total_size = $result->header('Content-length');
# Jos pienen esikatselukuvan kuvan koko on 1758, 1926 tai 4774 kyseessä on Bookyn 'Kuva tulossa' oletuskansikuva joka hypätään yli
if ($total_size ne "1758" && $total_size ne "4774" && $total_size ne "1926") {
$bibid = $itemdata[0];
$author= $itemdata[3];
$title = $itemdata[4];
$short_title = (split /\s*\/|\s*\:/, $title)[0];
$publ = $itemdata[5];
$location = @itemdata[6];
$numfed++;
unshift(@processed_isbns, $isbn);
print "<item>\n";
print "<title>$short_title</title>\n";
print "<link>$URL2Jykdok$bibid</link>\n";
print "<description>$author $title $publ $location $call</description>\n";
print "<guid>$URL2Jykdok$bibid</guid>\n";
print '<media:thumbnail url="https://www.booky.fi/image.php?id='.$isbn.'&amp;frontpage=true'.'"/>';
print "\n";
print '<enclosure type="image/jpeg" '.'length="'.$total_size.'" '.'url="https://www.booky.fi/image.php?id='.$isbn.'&amp;frontpage=true'.'"/>';
print "\n";
print "</item>\n"; }
}
}
}
} print "</channel>\n</rss>\n";
close $reader;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment