-
-
Save cdmwebs/646403 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'httparty' | |
require 'grit' | |
require 'pathname' | |
module Wikipedia | |
class Revision | |
def initialize(article, revision) | |
@article = article | |
@revision = revision | |
end | |
%w(timestamp user revid anon comment).each do |m| | |
define_method(m) do | |
@revision[m] | |
end | |
end | |
def content | |
@revision["*"] | |
end | |
end | |
class Revisions | |
include HTTParty | |
include Enumerable | |
base_uri "http://en.wikipedia.org/w/api.php" | |
default_params :action => :query, | |
:prop => :revisions, | |
:rvdir => :newer, | |
:rvprop => [:content, :timestamp, :user, :revid, :comment].join("|"), | |
:rvlimit => 50, | |
:format => :json | |
def initialize(article) | |
@article = article | |
@pageid = article.pageid | |
@revs = [] | |
end | |
def each | |
nextquery = {} | |
while true | |
result = self.class.get('', :query => {:pageids => @pageid}.merge(nextquery)) | |
@revs = result["query"]["pages"][@pageid]["revisions"].each do |rev| | |
yield Revision.new(@article, rev) | |
end | |
if result["query-continue"] | |
nextquery = result["query-continue"]["revisions"] | |
else | |
return | |
end | |
end | |
sleep 1 | |
end | |
end | |
class Article | |
include HTTParty | |
base_uri "http://en.wikipedia.org/w/api.php" | |
def initialize(name) | |
@name = name | |
end | |
def pageid | |
@pageid ||= begin | |
result = self.class.get('', | |
:query => { | |
:action => :query , | |
:prop => :revisions, | |
:titles => @name, | |
:format => :json }) | |
result["query"]["pages"].keys.first | |
end | |
end | |
def revisions | |
Revisions.new(self) | |
end | |
end | |
end | |
if __FILE__ == $0 | |
article = Wikipedia::Article.new(ARGV[0] || "Main Page") | |
id = article.pageid | |
dir = Pathname.new("cache").join(article.pageid) | |
FileUtils.mkdir_p(dir) | |
repo = Grit::Repo.init_bare(dir.join("#{article.pageid}.git")) | |
last_commit = nil | |
article.revisions.each do |r| | |
puts [r.timestamp, r.user].join(" - ") | |
user = Grit::Actor.new(r.user, r.user) | |
ENV["GIT_AUTHOR_DATE"] = r.timestamp.to_s | |
index = repo.index | |
index.add("article", r.content) | |
last_commit = index.commit(r.comment, last_commit, user) | |
end | |
p last_commit | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment