Skip to content

Instantly share code, notes, and snippets.

@kiru
Created April 12, 2023 08:58
Show Gist options
  • Save kiru/71685d9c5198c62234c63ff90e1d49d6 to your computer and use it in GitHub Desktop.
Save kiru/71685d9c5198c62234c63ff90e1d49d6 to your computer and use it in GitHub Desktop.

og:title and twitter:title

Mix.install([
  # HTTP request helper
  {:finch, "~> 0.14"},
  # HTML Parser
  {:floki, "~> 0.34.0"}
])

Section

Finch.start_link(name: MyFinch)
# Get all the links from hcrknews page (which gives us the top x links)
url = "https://hckrnews.com/"
{:ok, response} = Finch.build(:get, url) |> Finch.request(MyFinch)

{:ok, document} = Floki.parse_document(response.body)

links_hn =
  Floki.find(document, "a.link")
  |> Enum.map(fn {a, [b, {"href", link}], _} -> link end)
defmodule KiruHelper do
  def link_to_meta_tag(link) do
    IO.puts(link)

    # Do http request
    case Finch.build(:get, link) |> Finch.request(MyFinch) do
      {:ok, response} ->
        # parse HTML
        Floki.parse_document(response.body)
        |> extract_meta_tags

      _ ->
        []
    end
  end

  def extract_meta_tags({:ok, document}) do
    # try to find  a <head> tag
    case Floki.find(document, "head") |> Enum.at(0) do
      # nothign found
      nil ->
        []

      # found head
      {"head", _, innerHead} ->
        # we are only interested in meta tags
        meta_tags =
          innerHead
          |> Enum.filter(&is_tuple/1)
          |> Enum.filter(fn each -> Tuple.to_list(each) |> Enum.count() == 3 end)
          |> Enum.filter(fn {name, _, _} -> String.downcase(name) == "meta" end)

        # get the meta tags names out of each tag  
        meta_tags
        |> Enum.map(fn {"meta", attributes, _} ->
          r =
            attributes
            |> Enum.map(fn {attr_name, attr_value} ->
              # we are only interested in certain tags
              case String.downcase(attr_name) do
                "property" -> {"name", attr_value}
                "name" -> {"name", attr_value}
                "content" -> {"content", attr_value}
                "value" -> {"value", attr_value}
                _ -> nil
              end
            end)
            |> Enum.reject(&is_nil/1)
        end)
    end
  end

  def extract_meta_tags(_), do: []

  def extract_titles(meta_tags) do
    og_title =
      meta_tags
      |> Enum.map(fn each ->
        found = Enum.find(each, fn {key, value} -> value == "og:title" end)

        case found do
          nil ->
            nil

          _ ->
            each
            |> Enum.find(each, fn {key, value} -> key == "content" end)
            |> case do
              nil -> nil
              {"content", value} -> value
            end
        end
      end)
      |> Enum.reject(&is_nil/1)
      |> Enum.at(0)

    twitter_title =
      meta_tags
      |> Enum.map(fn each ->
        found = Enum.find(each, fn {key, value} -> value == "twitter:title" end)

        case found do
          nil ->
            nil

          _ ->
            each
            |> Enum.find(each, fn {key, value} -> key == "content" end)
            |> case do
              nil -> nil
              {"content", value} -> value
            end
        end
      end)
      |> Enum.reject(&is_nil/1)
      |> Enum.at(0)

    {og_title, twitter_title}
  end
end
og_tester_links = KiruHelper.link_to_meta_tag("https://ogtester.com/")
KiruHelper.extract_titles(og_tester_links)
# what do I need
meta_tags_hn = Enum.map(links_hn, &KiruHelper.link_to_meta_tag/1)
result = meta_tags_hn |> Enum.map(fn each -> KiruHelper.extract_titles(each) end)
count = result |> Enum.count()
result
|> Enum.frequencies_by(fn {og_title, twitter_title} ->
  cond do
    og_title == twitter_title -> :same
    og_title == nil and twitter_title != nil -> :og_missing
    twitter_title == nil and og_title != nil -> :twitter_missing
    twitter_title == nil and og_title == nil -> :both_missing
    twitter_title != og_title -> :not_same
  end
end)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment