Ilya Evseev ilyaevseev

## webcrawler.js
//PhantomJS http://phantomjs.org/ based web crawler Anton Ivanov anton.al.ivanov@gmail.com 2012
//UPDATE: This gist has been made into a Node.js module and now can be installed with "npm install js-crawler"
//the Node.js version does not use Phantom.JS, but the API available to the client is similar to the present gist

(function(host) {

    function Crawler() {
        this.visitedURLs = {};
    };


## install-chrome-headless.sh
#!/bin/bash
# from https://chromium.woolyss.com/
# and https://gist.github.com/addyosmani/5336747
# and https://chromium.googlesource.com/chromium/src/+/lkgr/headless/README.md
sudo apt-get update
sudo apt-get install software-properties-common
sudo add-apt-repository ppa:canonical-chromium-builds/stage
sudo apt-get update
sudo apt-get install chromium-browser
chromium-browser --headless --no-sandbox http://example.org/

## backup.sh
# Script to be placed in elasticsearch/bin
# Launch it from elasticsearch dir
# bin/backup indexname
# We suppose that data are under elasticsearch/data
# It will create a backup file under elasticsearch/backup

if [ -z "$1" ]; then
  INDEX_NAME="dummy"
else
  INDEX_NAME=$1

## anyevent-proxy.pl
#!/usr/bin/perl

# This is HTTP proxy built atop AnyEvent::HTTPD and AnyEvent::HTTP modules.
# I used it to solve some problem but after testing realised that it doesn't solve it entirely.
# So I removed special logic and leave almost plain proxy. With referer forging however :)
#
# Test thoroughly before use!

use strict;
use warnings;

## notes.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              3 stars
            
          
                krmaxwell
                / notes.md
            
            
              Last active
              August 31, 2016 11:51
            
              
                Installing jekyll fresh on Ubuntu 14.04.1 LTS
              
          
    $ sudo apt-get install ruby1.9.1-dev zlib1g-dev nodejs
$ sudo gem install jekyll
$ bundle install
References:

http://jekyllrb.com/docs/troubleshooting/
http://www.nokogiri.org/tutorials/installing_nokogiri.html
jekyll/jekyll#2327
	//PhantomJS http://phantomjs.org/ based web crawler Anton Ivanov anton.al.ivanov@gmail.com 2012
	//UPDATE: This gist has been made into a Node.js module and now can be installed with "npm install js-crawler"
	//the Node.js version does not use Phantom.JS, but the API available to the client is similar to the present gist

	(function(host) {

	function Crawler() {
	this.visitedURLs = {};
	};
	#!/bin/bash
	# from https://chromium.woolyss.com/
	# and https://gist.github.com/addyosmani/5336747
	# and https://chromium.googlesource.com/chromium/src/+/lkgr/headless/README.md
	sudo apt-get update
	sudo apt-get install software-properties-common
	sudo add-apt-repository ppa:canonical-chromium-builds/stage
	sudo apt-get update
	sudo apt-get install chromium-browser
	chromium-browser --headless --no-sandbox http://example.org/
	# Script to be placed in elasticsearch/bin
	# Launch it from elasticsearch dir
	# bin/backup indexname
	# We suppose that data are under elasticsearch/data
	# It will create a backup file under elasticsearch/backup

	if [ -z "$1" ]; then
	INDEX_NAME="dummy"
	else
	INDEX_NAME=$1
	#!/usr/bin/perl

	# This is HTTP proxy built atop AnyEvent::HTTPD and AnyEvent::HTTP modules.
	# I used it to solve some problem but after testing realised that it doesn't solve it entirely.
	# So I removed special logic and leave almost plain proxy. With referer forging however :)
	#
	# Test thoroughly before use!

	use strict;
	use warnings;