Skip to content

Instantly share code, notes, and snippets.

@daniello
Created June 12, 2009 21:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save daniello/128932 to your computer and use it in GitHub Desktop.
Save daniello/128932 to your computer and use it in GitHub Desktop.
-module(scroogle_scrapper).
-compile(export_all).
-define(SCROOGLE_URL, "https://ssl.scroogle.org/cgi-bin/nbbw.cgi").
-define(SCROOGLE_PEM, "[PATH_TO_SSL_SCROOGLE_ORG_PEM_CERTIFICATE]").
start() ->
inets:start(),
ssl:start().
fetch_scroogle_results(Q) ->
% We want binary as a result
Options = [{body_format, binary}],
HTTPOptions = [{ssl, [{cacertfile, ?SCROOGLE_PEM},{verify, 2}]}],
ReqBody = "Gw="++url_encoder:encode(Q)++"&n=1",
Request = {?SCROOGLE_URL, [], "application/x-www-form-urlencoded", ReqBody},
case http:request(post, Request, HTTPOptions, Options) of
{ok, {{"HTTP/1.1",200,"OK"}, _, Body}} -> Body;
{error,Error} -> {error,Error}
end.
parse(B, RE, Fun) ->
case re:run(B, RE, [global, caseless, unicode, dotall, multiline, {capture, all, binary}]) of
{match, Matches} ->
lists:map(
fun(Match) -> Fun(Match) end,
Matches);
nomatch -> []
end.
parse_results(B) ->
RE = "[0-9]+?\\.[[:space:]]+(<a href=.+?</a>)",
parse(B, RE, fun parse_result/1).
parse_result(GResult) ->
RE = "<a href=\"(.*?)\".*?>(.*?)</a>",
Fun = fun([_,Href,Name]) ->
{Href,Name}
end,
parse(GResult, RE, Fun).
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment