Skip to content

Instantly share code, notes, and snippets.

@novaugust
Last active December 21, 2022 16:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save novaugust/e13b86e8f39d693b72069ff149f0acef to your computer and use it in GitHub Desktop.
Save novaugust/e13b86e8f39d693b72069ff149f0acef to your computer and use it in GitHub Desktop.
instagram to ghost scripts
Mix.install [:floki, :jason]
defmodule InstaDumpParser do
def parse_url(url) do
# ghost uploads to this, keeping filename:
# https://blog.novaugust.net/content/images/2022/12/10616813_1695162544038552_1763391856_n_17841611368072760.jpg
[year, month, filename] = Regex.run(~r|media/posts/(\d\d\d\d)(\d\d)/(.*)|, url, capture: :all_but_first)
%{year: year, month: month, filename: filename}
end
def tag_frequencies(document) do
all_text = Floki.find(document, "body") |> Floki.text(sep: "\n")
tags = Regex.scan(~r/\#(\S+)/, all_text, capture: :all_but_first) |> List.flatten
Enum.frequencies(tags)
end
def parse_coords(table) do
[lat, long] = for div <- Floki.find(table, "._2piu>div"), do: Floki.text(div)
%{lat: lat, long: long}
end
def parse_people(table) do
tagged = table |> Floki.find("._2piu") |> Floki.text()
for s <- String.split(tagged, ~r/ \(Tagged, 0.00, 0.00\),?/), s = String.trim(s), s != "", do: s
end
def parse(path) do
{:ok, document} = path |> File.read!() |> Floki.parse_document
for post <- Floki.find(document, ".pam") do
imgs = post |> Floki.find("img") |> Floki.attribute("src")
{coords, ppl} =
case Floki.find(post, "table") do
[coordinates, people] ->
{parse_coords(coordinates), parse_people(people)}
[coords_or_people] ->
if String.contains?(Floki.text(coords_or_people), "Latitude"),
do: {parse_coords(coords_or_people), nil},
else: {nil, parse_people(coords_or_people)}
[] -> {nil, nil}
end
body = post |> Floki.find("._2pim") |> Floki.text()
timestamp = post |> Floki.find("._3-94") |> Floki.text
%{imgs: imgs, coords: coords, ppl: ppl, txt: body, timestamp: timestamp}
end
end
end
json = "./content/posts_1.html" |> InstaDumpParser.parse() |> Jason.encode!()
File.write!("posts.json", json)
Mix.install [:jason, :mobiledoc]
defmodule MobileDoc.Card.Markdown do
defstruct name: "markdown"
defmodule Html do
def setup(buffer, _options, _env, %{"markdown" => markdown}) do
buffer ++ [markdown]
end
end
end
defmodule MobileDoc.Card.Image do
defstruct name: "image"
defmodule Html do
def setup(buffer, _options, _env, %{"src" => src} = card) do
img = ~s|<img src="#{src}" alt="#{card["alt"]}">|
figure = if caption = card["caption"], do: "<figure>#{img}<figcaption>#{caption}</figcaption>", else: img
buffer ++ [figure]
end
end
end
alias MobileDoc.Renderer_0_3, as: MD
%{"db" => [%{"data" => data}]} = "novaugust.ghost.2022-12-20-12-46-53.json" |> File.read!() |> Jason.decode!()
post = data["posts"] |> List.last
md = post["mobiledoc"] |> Jason.decode!()
MD.render(md, %{"markdown" => MobileDoc.Card.Markdown}) |> IO.puts
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment