Skip to content

Instantly share code, notes, and snippets.

@thosakwe
Last active September 11, 2018 00:55
Show Gist options
  • Save thosakwe/505e42c010f8ef41f07d712e50bd4292 to your computer and use it in GitHub Desktop.
Save thosakwe/505e42c010f8ef41f07d712e50bd4292 to your computer and use it in GitHub Desktop.
Web Scraping ML???
// Simultaneously fetch three pages
fn main(url) {
// Asynchronously visit Github, and find all repos...
// Do this in the background.
//
// Returns a Task.
let myTask = fork {
let rq = fetch(url, {accept: 'text/html'})
let dom = response.dom()
let links = dom.querySelectorAll('a.repo-link')
return links.map(parseItem)
}
// Meanwhile, read a file containing the name of a repo to check.
let repoName = File.readAsText("repo_name.txt")
// Wait for the task to complete.
let items = join myTask
let item = items.firstWhere(item => item.name == repoName)
print('Found: %{item}')
}
function main() {
const response = fetch('...', {});
const dom = response.dom();
const links = dom.querySelectorAll("a.repo-link");
const items = links.map(parseItem);
}
type githubItem = {name: string; href: string}
let parse_item el =
{name = get_text el; href = get_attribute "href" el}
let main =
let rq = request "https://github.com" {accept = "application/json"} () in
let dom = parse_dom rq in
let links = query_selector "a.repo-link" dom in
let items = each parse_item links in
print_string (to_json items)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment