Skip to content

Instantly share code, notes, and snippets.

@macintux
Created November 28, 2012 03:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save macintux/4158835 to your computer and use it in GitHub Desktop.
Save macintux/4158835 to your computer and use it in GitHub Desktop.
Generate list of HTTP redirects
%%% @author John Daily <jd@epep.us>
%%% @copyright (C) 2012, John Daily
%%% @doc
%%% The httpc module in inets does not appear to offer any way to
%%% determine the "real" URL if it follows a series of redirects
%%% during its request. This code addresses that gap.
%%%
%%% The code does not interpret any HTML-based redirect, such as a
%%% meta refresh.
%%%
%%% It may be useful to wrap httpc:request so that the list of URLs
%%% is included in the standard result set, but that exercise is
%%% left for posterity.
%%% @end
%%% Created : 27 Nov 2012 by John Daily <jd@epep.us>
-module(chaseurls).
-compile(export_all).
-define(MAXREDIRECTS, 10).
%% Will return a tuple, with one of these atoms as first member:
%% * ok
%% * brokenchain (received a 4XX or 5XX status error when requesting a URL)
%% * toomany (too many redirects)
%%
%% The 2nd member is a list of URLs in reverse order. The URL
%% provided with the original call will always be the last member, and
%% the last URL encountered will be the first.
%%
%% Examples:
%% chase("http://some-bad-url") -> { brokenchain, [ "http://some-bad-url" ] }
%% chase("http://some-valid-url") -> { ok, [ "http://final-redirect", "http://intermediate-redirect",
%% "http://some-valid=url" ] }
%% chase("http://some-looping-url1") -> { toomany, [ "http://some-looping-url2",
%% "http://some-looping-url1",
%% "http://some-looping-url2",
%% "http://some-looping-url1",
%% "http://some-looping-url2",... ]}
chase(URL) ->
inets:start(), %% will silently fail if already started
chase({url, URL}, ?MAXREDIRECTS, []).
%% Allow the caller to specify the # of redirects before halting
chase(URL, MaxRedirects) ->
inets:start(), %% will silently fail if already started
chase({url, URL}, MaxRedirects, []).
%% Do not reverse the results; we want the last URL to be head of the
%% list in case that's all the caller wants
chase(done, _Count, URLs) ->
{ok, URLs};
chase({_, nolocation}, _Count, URLs) ->
{brokenchain, URLs};
chase({url, URL}, 0, URLs) ->
{toomany, [URL] ++ URLs};
chase({url, URL}, Count, Previous) ->
chase(process_result(httpc:request(get, {URL, []}, [{autoredirect, false}], [])),
Count - 1,
[URL] ++ Previous).
process_result({ok, {{_Version, Status, _Reason}, _Headers, _}}) when Status >= 200, Status < 300 ->
done;
process_result({ok, {{_Version, Status, _Reason}, Headers, _}}) when Status >= 300, Status < 400 ->
{url, proplists:get_value("location", Headers, nolocation) };
process_result({ok, {{_Version, Status, _Reason}, _Headers, _}}) when Status >= 400 ->
{error, nolocation}.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment