This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// save as index.js | |
// npm install https://github.com/GoogleChrome/puppeteer/ | |
// node index.js URL | |
const puppeteer = require('puppeteer'); | |
(async () => { | |
const url = process.argv[2]; | |
const browser = await puppeteer.launch(); | |
// use tor | |
//const browser = await puppeteer.launch({args:['--proxy-server=socks5://127.0.0.1:9050']}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* This Rust code scans through the Common Crawl, looking for text that's | |
* not English. I suspect I may learn much later that it's terrible, | |
* unidiomatic Rust, but it would take me months to learn what good Rust is. | |
* | |
* We depend on some external libraries: | |
* | |
* - html5ever: an HTML parser (we only use its low-level tokenizer) | |
* - encoding: handles text in all the encodings that WHATWG recognizes | |
* - string_cache: interns a bunch of frequently-used strings, like tag names -- necessary to use | |
* the html5ever tokenizer |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* | |
* Mongo-Hacker | |
* MongoDB Shell Enhancements for Hackers | |
* | |
* Tyler J. Brock - 2013 | |
* | |
* http://tylerbrock.github.com/mongo-hacker | |
* | |
*/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<!DOCTYPE en-export SYSTEM "http://xml.evernote.com/pub/evernote-export3.dtd"> | |
<en-export export-date="20130730T205637Z" application="Evernote" version="Evernote Mac"> | |
<note> | |
<title>Test Note for Export</title> | |
<content> | |
<![CDATA[<?xml version="1.0" encoding="UTF-8" standalone="no"?> | |
<!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd"> | |
<en-note style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space;"> | |
Hello, World. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import unittest, os, os.path, sys, urllib | |
import tornado.database | |
import tornado.options | |
from tornado.options import options | |
from tornado.testing import AsyncHTTPTestCase | |
# add application root to sys.path | |
APP_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) | |
sys.path.append(os.path.join(APP_ROOT, '..')) |