Skip to content

Instantly share code, notes, and snippets.

@Htbaa
Created September 3, 2011 20:05
Show Gist options
  • Save Htbaa/1191706 to your computer and use it in GitHub Desktop.
Save Htbaa/1191706 to your computer and use it in GitHub Desktop.
BlitzMax Indexer
*.xml
.*
!.gitignore
Makefile*
!Makefile.PL
META.yml
MYMETA.*
blib
build
/inc
pm_to_blib
#MANIFEST*
!MANIFEST.SKIP
*.bak
!.perltidyrc
*~
*.orig
ABOUT
This is a little Perl script that indexes the www.blitzmax.com website to
generate an Atom and RSS feed containing the latest news items and forum posts.
GETTING STARTED
Perl 5.10 or higher is required.
$ git clone git://github.com/Htbaa/BlitzMax.com-Indexer.git bmxfeed
$ cd bmxfeed
$ cpanm -n --installdeps .
$ perl blitzmax_indexer.pl 1
You can also run blitzmax_indexer.pl without an argument which in effect
doesn't make the script output information, which is useful when running this
with cron.
LICENSE
This software is licensed under the MIT license. Please see the COPYING file.
#!/usr/bin/env perl
use strict;
use warnings;
use ojo;
use CHI;
use DateTime;
use XML::Feed;
use Digest::MD5 qw(md5_hex);
use File::Spec;
use v5.10;
my $debug = $ARGV[0] || 0;
my $cache = CHI->new(driver => 'File', root_dir => File::Spec->tmpdir());
my @items;
my $forum_url = 'http://blitzmax.com/Community/';
my $date_regexp =
qr/(\d+)\+? (seconds?|minutes?|hours?|weeks?|days?|months?|years?)/i;
# Forum topics
my $res = g($forum_url . '_index_.php');
$res->dom->find('div.main td.cell a')->each(
sub {
my $node = shift;
my $category = $node->text;
say $category if $debug;
my $res = g($forum_url . $node->{href});
$res->dom->find('div.main td.cell a')->each(
sub {
my $node = shift;
say "\t" . $node->text if $debug;
my $td = $node->parent->parent->find('td.cell');
my $content = $cache->compute(
$node->{href},
undef,
sub {
my $res = g($forum_url . $node->{href});
return $res->dom->find('td.posttext:first-child')
->first;
}
);
my $data = {
category => $category,
url => $forum_url . $node->{href},
title => $node->text,
author => $td->[1]->text,
date => do {
my $text_node = $td->[3]->find('font.tiny')->first;
return unless $text_node;
my $date_str = $text_node->text;
my ($number, $type) = ($date_str =~ $date_regexp);
my $date = DateTime->now->clone;
if ($type) {
$type .= 's' unless substr($type, -1, 1) eq 's';
$date = $date->subtract($type => $number);
}
else {
warn "Unknown date type: ", $date_str;
}
$date;
},
summary => substr($content->text, 0, 200),
content => $content->content_xml,
};
push @items, $data;
}
);
}
);
# Frontpage news items
say "Frontpage" if $debug;
$res = g('http://blitzmax.com/Home/_index_.php');
$res->dom->find('div.main table')->each(
sub {
my $node = shift;
my $post_head = $node->find('td.posthead')->first;
my $post_text = $node->find('td.posttext')->first;
my $data = {};
if ($post_head && $post_text) {
$data->{category} = 'Frontpage';
$data->{url} = 'http://blitzmax.com';
$data->{title} = $post_head->find('b')->first->text;
say "\t", $data->{title} if $debug;
($data->{author}, $data->{date}) = do {
my $content = $post_head->find('td')->first->all_text;
my ($author, $date_time_str) =
$content =~ m/submitted by (.+) on (.+)\)/mg;
my ($date_str, $time_str) = split /\s/, $date_time_str;
my ($year, $month, $day) = split /-/, $date_str;
my ($hour, $minute, $second) = split /:/, $time_str;
my $datetime = DateTime->new(
year => $year,
month => $month,
day => $day,
hour => $hour,
minute => $minute,
second => $second
);
($author, $datetime);
};
$data->{content} = $post_text->content_xml;
$data->{summary} = substr($post_text->all_text, 0, 200);
push @items, $data;
}
}
);
my @sorted_items = sort { $b->{date} cmp $a->{date} } @items;
my @sliced_items = @sorted_items[0 .. 50];
foreach (qw/RSS Atom/) {
say 'Generating ', $_, ' feed' if $debug;
my $feed = XML::Feed->new($_);
$feed->title('BlitzMax ' . $_ . ' Feed');
$feed->link('http://blitzmax.com');
$feed->description('Blitzmax.com feed containing news and forum topics');
foreach (@sliced_items) {
next
unless (defined($_->{url})
&& defined($_->{title})
&& defined($_->{category}));
my $entry = XML::Feed::Entry->new();
$entry->id(md5_hex($_->{url} . $_->{title}));
$entry->link($_->{url});
$entry->title(sprintf('[%s] %s', $_->{category}, $_->{title}));
$entry->summary($_->{summary} . '...');
$entry->content($_->{content});
$entry->issued($_->{date});
$entry->modified($_->{date});
$entry->author($_->{author});
$feed->add_entry($entry);
}
open my $fh, '>', lc($_) . '.xml';
print $fh $feed->as_xml;
close $fh;
}
Copyright (c) 2011-2013 Christiaan Kras
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
use strict;
use warnings;
use ExtUtils::MakeMaker;
WriteMakefile(
NAME => 'BlitzMaxComIndexer',
AUTHOR => 'Christiaan Kras',
VERSION => '1.00',
LICENSE => 'mit',
ABSTRACT =>
'An indexer for the BlitzMax.com website to generate an Atom and RSS '
. 'feed.',
MIN_PERL_VERSION => '5.010001',
PREREQ_PM => {
'CHI' => '0.56',
'DateTime' => '0.78',
'XML::Feed' => '0.51',
'Digest::MD5' => '2.52',
'File::Spec' => '3.33',
'Mojolicious' => '3.72',
},
META_MERGE => {
resources =>
{repository => 'https://github.com/Htbaa/BlitzMax.com-Indexer',}
},
);
@Htbaa
Copy link
Author

Htbaa commented Jan 6, 2013

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment