Skip to content

Instantly share code, notes, and snippets.

@satojkovic
Created September 7, 2010 16:55
Show Gist options
  • Save satojkovic/568655 to your computer and use it in GitHub Desktop.
Save satojkovic/568655 to your computer and use it in GitHub Desktop.
#! /usr/bin/perl
use strict;
use warnings;
use XML::RSS;
use LWP::Simple;
use URI;
use Encode;
## フィードを取得するサイトのURLをファイルから取得する関数
sub get_feed_url{
my $fn = shift;
# リストファイルを行単位で読み込む
my @feedlist = ();
open(my $fh, "<", $fn) or die "$!";
while(<$fh>) {
chomp;
push(@feedlist, $_);
}
close $fh;
return \@feedlist;
}
my $feedlist = get_feed_url('../feedlist.txt');
## 確認
# my $cnt = @{$feedlist};
# for(my $i=0; $i<$cnt; $i++) {
# print $feedlist->[$i] . "\n";
# }
## Feedの単語を取得する関数
sub get_words {
my $item = shift;
# タグを除去
my $body = $item->{description};
$body =~ s/<.*?>//g;
# 確認出力
printf "| %s\n%s\n\n",
encode_utf8($item->{title}), encode_utf8($body);
}
## Feedの単語数をカウントする関数
sub get_word_counts {
my $url = shift;
# Feedをパースする
my $xml = get(URI->new($url)) or die "Can't GET $url\n";
my $rss = XML::RSS->new();
$rss->parse($xml);
# 単語を取り出す
for my $item (@{$rss->{'items'}}) {
my $words = get_words($item);
}
}
## FeedlistにあるURLにアクセスして単語を取り出す
my $words = sub {
my $feedlist = shift;
foreach my $url (@{$feedlist}) {
my $wc = get_word_counts($url);
}
};
$words->($feedlist);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment