Skip to content

Instantly share code, notes, and snippets.

@sharifulin
Created June 18, 2009 10:12
Show Gist options
  • Save sharifulin/131836 to your computer and use it in GitHub Desktop.
Save sharifulin/131836 to your computer and use it in GitHub Desktop.
Parse RSS Yandex Search
#!/usr/bin/perl
use utf8;
use strict;
use lib '../../lib';
use common;
use XML::Feed;
use Storable qw(nstore retrieve);
use Data::Dumper;
my $F = 'blog.store';
my $E = @ARGV ? {} : eval { retrieve $F };
my $feed = XML::Feed->parse(URI->new(
'http://blogs.yandex.ru/search.rss?ft=all&full=1&server=twitter.com%2C%20station20.ru%2C%20twitterlinks.ru&text=%22%D0%A1%D0%A2%D0%90%D0%9D%D0%A6%D0%98%D0%AF%202.0%22%20|%20%22station20%22&x_server=on'
)) or die XML::Feed->errstr;
my $count;
for ($feed->entries) {
$DB->query(
'insert into chat set type="blog", message=?, etc=?, created=now() - interval ? minute',
_clean($_->content->body),
join('::', $_->author, $_->title, $_->link),
$count++ || 0 # fuck yandex - no time2iso(str2time($_->issued)) :)
) unless $E->{ $_->link }++;
}
print $count;
nstore $E, $F unless @ARGV;
sub _clean($) {
for (my $str = shift) {
s{</?ns0:hlword>}{}sg;
return $_;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment