Skip to content

Instantly share code, notes, and snippets.

@ncb000gt
Created January 6, 2011 20:49
Show Gist options
  • Save ncb000gt/768556 to your computer and use it in GitHub Desktop.
Save ncb000gt/768556 to your computer and use it in GitHub Desktop.
Make a request, gunzip with node-compress and parse to dom with node-htmlparser.
var htmlparser = require('htmlparser'),
sys = require('sys'),
http = require('http'),
compress_lib = require('compress'),
gunzip = new compress_lib.GunzipStream();
var domain = 'www.example.com'; //TODO: CHANGE ME
var rawHtml = "Xyz <script language= javascript>var foo = '<<bar>>';< / script><!--<!-- Waah! -- -->";
var handler = new htmlparser.DefaultHandler(function (error, dom) {
if (error) {
sys.debug('err: ' + error);
} else {
sys.debug('dom: ' + JSON.stringify(dom));
}
},
{ verbose: false, ignoreWhitespace: true });
var parser = new htmlparser.Parser(handler);
var proxy = http.createClient(80, domain);
proxy_req = proxy.request('GET', '/', {'User-Agent': 'htmlparser-test/0.0.1','Host':domain, 'Accept-Encoding':'gzip,deflate'});
gunzip.on('data', function(chunk) {
parser.parseChunk(chunk);
}).on('end', function() {
sys.debug('ended');
parser.done();
});
proxy_req.on('response', function(proxy_res) {
proxy_res.on('data', function(chunk) {
gunzip.write(chunk);
}).on('end', function() {
proxy_res.connection.end();
gunzip.close();
});
});
proxy_req.end();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment