Created
April 29, 2012 00:46
-
-
Save progd/2522934 to your computer and use it in GitHub Desktop.
FilterFullFeed (Automatic Ruby plugin)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
module Automatic::Plugin | |
class FilterFullFeed | |
require 'nokogiri' | |
SITEINFO_TYPES = %w[SBM INDIVIDUAL IND SUBGENERAL SUB GENERAL GEN] | |
def initialize(config, pipeline=[]) | |
@config = config | |
@pipeline = pipeline | |
@siteinfo = get_siteinfo | |
end | |
def get_siteinfo | |
Automatic::Log.puts(:info, "Loading siteinfo from #{@config['siteinfo']}") | |
siteinfo = JSON.load(open(@config['siteinfo']).read) | |
# type が不正なものは除外する | |
siteinfo.select! { |info| SITEINFO_TYPES.include? (info['data']['type']) } | |
# SITEINFO の type が汎用的なものを配列の後ろのほうにする | |
siteinfo.sort! { |a, b| | |
atype, btype = a['data']['type'], b['data']['type'] | |
SITEINFO_TYPES.index(atype) <=> SITEINFO_TYPES.index(btype) | |
} | |
return siteinfo | |
end | |
def fulltext(feed) | |
return feed unless feed.link | |
@siteinfo.each { |info| | |
if feed.link.match(info['data']['url']) | |
Automatic::Log.puts(:info, "Siteinfo matched: #{info['data']['url']}") | |
html = Nokogiri::HTML.parse(open(feed.link)) | |
body = html.xpath(info['data']['xpath']) | |
feed.description = body.to_html.encode('UTF-8', :undef => :replace) | |
return feed | |
end | |
} | |
Automatic::Log.puts(:info, "Fulltext SITEINFO not found: #{feed.link}") | |
return feed | |
end | |
def run | |
return_feeds = [] | |
@pipeline.each { |feeds| | |
unless feeds.nil? | |
feeds.items.each {|feed| | |
feed = fulltext(feed) | |
} | |
end | |
return_feeds << feeds | |
} | |
return_feeds | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment