The following gist is an extract of the article Building a simple crawler. It allows crawling from a URL and for a given number of bounce.
from crawler import Crawler
crawler = Crawler()
crawler.crawl('http://techcrunch.com/')
from IPython.display import HTML | |
def hide_code(): | |
return HTML('''<script> | |
code_show=true; | |
function code_toggle() { | |
if (code_show){ | |
$("div.input").hide(); | |
} else { | |
$("div.input").show(); |
__author__ = 'uolter' | |
""" | |
Defines a single function, map_reduce, which takes an input | |
dictionary i and applies the user-defined function mapper to each | |
(input_key,input_value) pair, producing a list of intermediate | |
keys and intermediate values. Repeated intermediate keys then | |
have their values grouped into a list, and the user-defined | |
function reducer is applied to the intermediate key and list of |
__author__ = 'uolter' | |
import map_reduce | |
def mapper(input_key, input_value): | |
def cut_and_clean_value(cluster): | |
""" | |
:param cluster: string in the format <cluster>:<value> | |
:return: touple cluster and value. If value is NaN return 0 |
package main | |
import ( | |
"encoding/json" | |
"io/ioutil" | |
"log" | |
"net/http" | |
) | |
type test_struct struct { |
#!/usr/bin/env | |
# -*- coding: utf-8 -*- | |
import unittest | |
""" Quicksort implementation """ | |
def quicksort(arr): | |
""" Quicksort a list |
# -*- coding: utf-8 -*- | |
import unittest | |
index = {} | |
class tree(object): |
pip freeze --local | grep -v '^\-e' | cut -d = -f 1 | xargs pip install -U |
curl https://raw.githubusercontent.com/pypa/pip/master/contrib/get-pip.py > get-pip.py; | |
python get-pip.py; | |
rm -f get-pip.py; | |
# change directory here. Go in your project home dir. | |
# cd /opt/uuid_resolver/; | |
pip install virtualenv; | |
virtualenv venv; | |
# activate the virtualenv | |
source venv/bin/activate | |
# change here your requirements.txt location |
The following gist is an extract of the article Building a simple crawler. It allows crawling from a URL and for a given number of bounce.
from crawler import Crawler
crawler = Crawler()
crawler.crawl('http://techcrunch.com/')
#!/bin/bash | |
# Elastic Serarch Start and Stop Script | |
ES_HOME="/opt/elsearch/elasticsearch" | |
ES_USER="esearch" | |
PID=$(ps ax | grep elasticsearch | grep $ES_HOME | grep -v grep | awk '{print $1}') | |
#echo $PID |