bubnenkoff/extract_urls_from_html.d

## extract_urls_from_html.d
import std.net.curl, std.stdio;
import std.algorithm, std.regex;

void main() {
	get("http://www.stroustrup.com/C++.html")
	    .matchAll(`<a.*?href="(.*)"`)
	    .map!(m => m[1])
	    .each!writeln();
}

## extract_urls_from_html_2.d
---

import arsd.dom;
import std.net.curl;
import std.stdio, std.algorithm;

void main() {
	auto document = new Document(cast(string)
get("http://www.stroustrup.com/C++.html"));
	writeln(document.querySelectorAll("a[href]").map!(a=>a.href));
}

---

prints:
[snip ... "http://www.morganstanley.com/",
"http://www.cs.columbia.edu/", "http://www.cse.tamu.edu",
"index.html", "C++.html", "bs_faq.html", "bs_faq2.html",
"C++11FAQ.html", "papers.html", "4th.html", "Tour.html",
"programming.html", "dne.html", "bio.html", "interviews.html",
"applications.html", "glossary.html", "compilers.html"]


Or perhaps better yet:

import arsd.dom;
import std.net.curl;
import std.stdio;

void main() {
	auto document = new Document(cast(string)
get("http://www.stroustrup.com/C++.html"));
	foreach(a; document.querySelectorAll("a[href]"))
		writeln(a.href);
}
	import std.net.curl, std.stdio;
	import std.algorithm, std.regex;

	void main() {
	get("http://www.stroustrup.com/C++.html")
	.matchAll(`<a.?href="(.)"`)
	.map!(m => m[1])
	.each!writeln();
	}
	---

	import arsd.dom;
	import std.net.curl;
	import std.stdio, std.algorithm;

	void main() {
	auto document = new Document(cast(string)
	get("http://www.stroustrup.com/C++.html"));
	writeln(document.querySelectorAll("a[href]").map!(a=>a.href));
	}

	---

	prints:
	[snip ... "http://www.morganstanley.com/",
	"http://www.cs.columbia.edu/", "http://www.cse.tamu.edu",
	"index.html", "C++.html", "bs_faq.html", "bs_faq2.html",
	"C++11FAQ.html", "papers.html", "4th.html", "Tour.html",
	"programming.html", "dne.html", "bio.html", "interviews.html",
	"applications.html", "glossary.html", "compilers.html"]



	Or perhaps better yet:

	import arsd.dom;
	import std.net.curl;
	import std.stdio;

	void main() {
	auto document = new Document(cast(string)
	get("http://www.stroustrup.com/C++.html"));
	foreach(a; document.querySelectorAll("a[href]"))
	writeln(a.href);
	}