dhagrow/crawler

## crawler
# imports
imp html
imp http
imp process
imp coroutine

# standard library is implicitly accessible in the global namespace
# e.g. set(), queue(), print()

# class definition
cls Crawler:
    # method definition
    fn init!(start_url) {
        # the '.' prefix references the current class instance
        # (aka 'self' or 'this')
        .url_q = queue()
        .url_q.put(start_url)

        .response_q = queue()

        .seen = set()
    }

    fn crawl() {
        # fork a new process to parse responses
        process.fork(.process_responses, .response_q, .url_q)

        for url in .url_q:
            if url in .seen:
                continue
            .seen.add(url)

            # spawn a coroutine to make the HTTP request using an anonymous
            # function
            coroutine.spawn(fn() {
                response = http.get(url)
                .response_q.put(response)
            })
    }

    # mark this as a static function (i.e. not bound to the parent class)
    @static
    fn process_responses(response_q, url_q) {
        for response in response_q:
            doc = html.parse(response.content)
            for link in doc.links:
                url_q.put(link.attrs.href)
    }

## main
# imports
imp args
imp crawler

# functions and methods are the only language contructs that require curly
# brace syntax
# the '!' suffix marks a function with special semantics
fn main!() {
    # the ':' prefix references the global namespace
    # it is optional, but can be used to assign to a global name or for
    # disambiguation
    parser = :args.Parser()
    parser.add('start-url', help='the URL to begin crawling from')

    args = parser.parse()

    c = crawler.Crawler(args.start_url)
    c.crawl()
}
	# imports
	imp html
	imp http
	imp process
	imp coroutine

	# standard library is implicitly accessible in the global namespace
	# e.g. set(), queue(), print()

	# class definition
	cls Crawler:
	# method definition
	fn init!(start_url) {
	# the '.' prefix references the current class instance
	# (aka 'self' or 'this')
	.url_q = queue()
	.url_q.put(start_url)

	.response_q = queue()

	.seen = set()
	}

	fn crawl() {
	# fork a new process to parse responses
	process.fork(.process_responses, .response_q, .url_q)

	for url in .url_q:
	if url in .seen:
	continue
	.seen.add(url)

	# spawn a coroutine to make the HTTP request using an anonymous
	# function
	coroutine.spawn(fn() {
	response = http.get(url)
	.response_q.put(response)
	})
	}

	# mark this as a static function (i.e. not bound to the parent class)
	@static
	fn process_responses(response_q, url_q) {
	for response in response_q:
	doc = html.parse(response.content)
	for link in doc.links:
	url_q.put(link.attrs.href)
	}
	# imports
	imp args
	imp crawler

	# functions and methods are the only language contructs that require curly
	# brace syntax
	# the '!' suffix marks a function with special semantics
	fn main!() {
	# the ':' prefix references the global namespace
	# it is optional, but can be used to assign to a global name or for
	# disambiguation
	parser = :args.Parser()
	parser.add('start-url', help='the URL to begin crawling from')

	args = parser.parse()

	c = crawler.Crawler(args.start_url)
	c.crawl()
	}