-
-
Save ga2arch/e8904177f722c6560e37 to your computer and use it in GitHub Desktop.
Prolog Uri Parser v2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
% ====================================== | |
% Operators | |
% ====================================== | |
'<--'(X, F, A, Z) :- | |
call(F, A, (P, R)), | |
X = P, | |
Z = (P, R). | |
:- op(600, xfx, '<--'). | |
>>(P1, P2, Text, Z) :- | |
!, | |
>>(P1, P2, [], Text, Z). | |
>>(P1, P2 >> P3, Acc, Text, Z) :- | |
!, | |
append(Acc, [P1], W), | |
>>(P2, P3, W, Text, Z). | |
>>(P1, P2, Acc, Text, Z) :- | |
append(Acc, [P1, P2], W), | |
do(W, Text, Z). | |
:- op(400, xfy, '>>'). | |
'$'(F, X, A, Z) :- | |
call(F, X, A, Z). | |
:- op(500, xfx, '$'). | |
% ====================================== | |
% Primitive Parsers | |
% ====================================== | |
letter([C|Text], Z) :- | |
Z = (C, Text). | |
letter_not(Not, [C|Text], Z) :- | |
string_to_list(Not, Symbols), | |
not(member(C, Symbols)), | |
Z = (C, Text). | |
char(Ch, [C|Text], Z) :- | |
not(is_number(C)), | |
char_code(Ch, C), | |
Z = (C, Text). | |
digit([C|Text], Z) :- | |
is_number(C), | |
number_chars(X, [C]), | |
X >= 0, | |
X =< 9, | |
Z = (C, Text). | |
% ====================================== | |
% Parsers Combinators | |
% ====================================== | |
many(Parser, Text, Z) :- | |
seq(Text, [Parser], Z). | |
many1(Parser, Text, Z) :- | |
many(Parser, Text, (P, R)), | |
not(empty(P)), | |
Z = (P, R). | |
seq(Text, Parsers, Z) :- | |
seq(Text, Parsers, 0, [], Z). | |
seq([], _, _, Parsed, (Parsed, [])) :- !. | |
seq(Text, Parsers, I, Parsed, Z) :- | |
( | |
nth0(I, Parsers, Parser) -> | |
( | |
call(Parser, Text, (P, R)) -> | |
append(Parsed, [P], W), | |
seq(R, Parsers, 0, W, Z) | |
; I1 is I+1, | |
seq(Text, Parsers, I1, Parsed, Z) | |
) | |
; Z = (Parsed, Text) | |
), !. | |
do(Parsers, Text, Z) :- | |
do(Parsers, Text, [], Z). | |
do([], Text, Parsed, Z) :- | |
Z = (Parsed, Text), !. | |
do([e(Parser)|Ps], Text, Parsed, Z) :- | |
!, | |
call(Parser, Text, (_, R)), | |
do(Ps, R, Parsed, Z). | |
do([Parser|Ps], Text, Parsed, Z) :- | |
call(Parser, Text, (P, R)), | |
append(Parsed, [P], W), | |
do(Ps, R, W, Z). | |
do1(Parsers, Text, Z) :- | |
do(Parsers, Text, (P, R)), | |
not(empty(P)), | |
Z = (P, R). | |
% ====================================== | |
% Helpers | |
% ====================================== | |
empty([]). | |
parse(F, X, Z) :- | |
call(F, X, Z). | |
is_number(C) :- | |
catch(number_chars(_, [C]), _, fail). | |
try(Parser, Text, Z) :- | |
( | |
call(Parser, Text, Z), ! | |
; Z = ([], Text) | |
). | |
st([], []). | |
st(L, Z) :- | |
( | |
is_list(L) -> | |
flatten(L, FS), | |
string_to_atom(FS, Z) | |
; string_to_atom(L, Z) | |
). | |
an(T, Z) :- | |
( | |
atom_number(T, Z) | |
; Z = [] | |
). | |
prettify((S, (U, H, P), Pa, Q, F), Z) :- | |
st(S, Scheme), | |
st(U, UserInfo), | |
st(H, Host), | |
st(P, TP), | |
an(TP, Port), | |
st(Pa, Path), | |
st(Q, Query), | |
st(F, Fragment), | |
Z = uri(Scheme, UserInfo, Host, | |
Port, Path, Query, Fragment), !. | |
prettify((S, U, H), Z) :- | |
prettify((S, (U, H, []), [], [], []), Z). | |
% ====================================== | |
% Parsers | |
% ====================================== | |
id(Text, Z) :- | |
many1(letter_not("/?#@:"), Text, Z). | |
scheme(Text, Z) :- | |
id(Text, Z). | |
authority(Text, Z) :- | |
do( | |
[ | |
char('/'), | |
char('/'), | |
try $ do( | |
[ | |
U <-- userinfo, | |
char('@') | |
]), | |
H <-- host, | |
try $ do( | |
[ | |
char(':'), | |
P <-- port | |
]) | |
], Text, (_, R)), | |
Z = ((U, H, P), R), !. | |
userinfo(Text, Z) :- | |
id(Text, Z). | |
host_id(Text, Z) :- | |
many1(letter_not("./?#@:"), Text, Z). | |
digit3(Text, Z) :- | |
do([digit, digit, digit], Text, Z). | |
%do([digit, digit], Text, Z); | |
%do([digit], Text, Z). | |
ip(Text, Z) :- | |
do1([digit3, char('.'), | |
digit3, char('.'), | |
digit3, char('.'), | |
digit3], Text, (P, R)), | |
\+ [46|_] = R, | |
Z = (P, R). | |
host(Text, Z) :- | |
ip(Text, Z), !; | |
parse( | |
host_id | |
>> (many $ char('.') | |
>> host_id), Text, Z), !; | |
host_id(Text, Z), !. | |
port(Text, Z) :- | |
many1(digit, Text, Z). | |
path(Text, Z) :- | |
parse( | |
id | |
>> (many $ char('/') | |
>> id), Text, Z). | |
query(Text, Z) :- | |
parse( | |
e(char('?')) | |
>> (many1 $ letter_not("#")), | |
Text, Z). | |
fragment(Text, Z) :- | |
parse( | |
e(char('#')) | |
>> (many1 $ letter), | |
Text, Z). | |
% ====================================== | |
% Main | |
% ====================================== | |
parsed_uri(Text, Z) :- | |
scheme(Text, ("mailto", Rs)), !, | |
do( | |
[ | |
char(':'), | |
U <-- userinfo, | |
try $ do( | |
[ | |
char('@'), | |
H <-- host | |
]) | |
], Rs, (_, [])), | |
prettify(("mailto", U, H), Z), !. | |
parsed_uri(Text, Z) :- | |
scheme(Text, ("news", Rs)), !, | |
do( | |
[ | |
char(':'), | |
H <-- host | |
], Rs, (_, [])), | |
prettify(("news", [], H), Z), !. | |
parsed_uri(Text, Z) :- | |
scheme(Text, (S, Rs)), | |
(S = "tel"; S = "fax"), !, | |
do( | |
[ | |
char(':'), | |
U <-- userinfo | |
], Rs, (_, [])), | |
prettify((S, U, []), Z), !. | |
parsed_uri(Text, Z) :- | |
do( | |
[ | |
S <-- scheme, | |
char(':'), | |
A <-- authority, | |
try $ do( | |
[ | |
char('/'), | |
Pa <-- try $ path, | |
Q <-- try $ query, | |
F <-- try $ fragment | |
]) | |
], Text, (_, [])), | |
prettify((S, A, Pa, Q, F), Z), !. | |
parsed_uri(Text, Z) :- | |
do( | |
[ | |
S <-- scheme, | |
char(':'), | |
try $ char('/'), | |
Pa <-- try $ path, | |
Q <-- try $ query, | |
F <-- try $ fragment | |
], Text, (_, [])), | |
prettify((S, ([], [], []), Pa, Q, F), Z), !. | |
:- begin_tests(t). | |
test(parsed_uri) :- | |
parsed_uri("http://disco.unimib.com", _), | |
parsed_uri("http://g@disco.unimib.com", _), | |
parsed_uri("http://g@disco.unimib.com:30", _), | |
parsed_uri("http://g@disco.unimib.com:30/", _), | |
parsed_uri("http://g@disco.unimib.com:30/ciao", _), | |
parsed_uri("http://g@disco.unimib.com:30/ciao/bella", _), | |
parsed_uri("http://g@disco.unimib.com:30/ciao/bella?lol", _), | |
parsed_uri("http://g@disco.unimib.com:30/ciao/bella?lol#c", _), | |
parsed_uri("http://192.168.001.002", _), | |
parsed_uri("http://g@192.168.001.002", _), | |
parsed_uri("http://g@192.168.001.002:30", _), | |
parsed_uri("http://g@192.168.001.002:30/", _), | |
parsed_uri("http://g@192.168.001.002:30/ciao", _), | |
parsed_uri("http://g@192.168.001.002:30/ciao/bella", _), | |
parsed_uri("http://g@192.168.001.002:30/ciao/bella?lol", _), | |
parsed_uri("http://g@192.168.001.002:30/ciao/bella?lol#c", _). | |
test(parsed_uri_neg) :- | |
\+ parsed_uri("http://disco.unimib.com:",_), | |
\+ parsed_uri("http://disco.unimib.com:/ciao",_), | |
\+ parsed_uri("http://disco.unimib.com/ciao/",_), | |
\+ parsed_uri("http://disco.unimib.com//",_), | |
\+ parsed_uri("http://disco.unimib.com#",_), | |
\+ parsed_uri("http://disco.unimib.com#ciao",_), | |
\+ parsed_uri("http://disco.unimib.com/#",_), | |
\+ parsed_uri("http://disco.unimib.com?lol",_), | |
:- end_tests(t). |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment