-
-
Save JuanitoFatas/d2f92d9b3c9d9e931389c2e99dce86af to your computer and use it in GitHub Desktop.
Generate Stripe Blog Archive in markdown
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# frozen_string_literal: true | |
require "nokogiri" | |
require "http" | |
class Article | |
def initialize(article) | |
@article = article | |
end | |
def to_s | |
if category.empty? | |
%(- #{title_link} by #{author} #{date}) | |
else | |
%(- #{title_link} by #{author} in #{category} #{date}) | |
end | |
end | |
private | |
attr_reader :article, :title_link, :author, :date, :category | |
def title_link | |
post = article.css("header h1 a").first | |
title_link = %([#{post.text}](https://stripe.com#{post.attr("href")})) | |
end | |
def author | |
authors = article.css("a.author") | |
if authors.empty? | |
article.css("span.author").text | |
else | |
authors.map { |a| "[#{a.text}](#{a[:href]})" }.join(" with ") | |
end | |
end | |
def date | |
article.css("header span:last-child").text.lstrip | |
end | |
def category | |
@_category ||= article.css("header p a:last-child").text | |
end | |
end | |
class Page | |
def initialize(page) | |
@page = page | |
@document = Nokogiri::HTML.parse(HTTP.follow.get(page).to_s) | |
end | |
def to_s | |
[heading, posts].join("\n\n") | |
end | |
def heading | |
"### [Page #{number}](#{page})" | |
end | |
def posts | |
articles_by_human.map do |article| | |
Article.new(article).to_s | |
end.join("\n") | |
end | |
private | |
attr_reader :page, :document | |
def number | |
page.end_with?("/blog") ? "1" : File.basename(page) | |
end | |
def articles_by_human | |
document. | |
css("article:not(.changelog-set)"). | |
css("article:not(.link)"). | |
css("article:not(.globalFooterNav)") | |
end | |
end | |
module StripeBlog | |
URL = "https://stripe.com/blog" | |
def self.pages | |
[*2..last_page].map do |page_number| | |
File.join(URL, "page/#{page_number}") | |
end.prepend(URL) | |
end | |
def self.last_page | |
[*26..100].each do |page_number| | |
response_uri = HTTP.follow.get("#{URL}/page/#{page_number}").uri.to_s | |
if response_uri.end_with?("/blog") | |
break(page_number-1) | |
end | |
end | |
end | |
end | |
markdown = StripeBlog.pages.map do |page| | |
puts "Parsing #{page}" | |
Page.new(page).to_s | |
end | |
puts "Generated in tmp/stripe-blog.md." | |
IO.write("tmp/stripe-blog.md", markdown.join("\n\n")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment