cwylie0 cwylie0

## scrape.py
from bs4 import BeautifulSoup
from urllib2 import Request, urlopen
import decimal

def findPrice(url, selector):
	userAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36"
	req = Request(url, None, {'User-Agent': userAgent})
	html = urlopen(req).read()
	soup = BeautifulSoup(html, "lxml")
	return decimal.Decimal(soup.select(selector)[0].contents[0].strip().strip("$"))

## spider.sh
#!/bin/bash

HOME="http://www.yourdomain.com/some/page"
DOMAINS="yourdomain.com"
DEPTH=2
OUTPUT="./urls.csv"

wget -r --spider --delete-after --force-html -D "$DOMAINS" -l $DEPTH "$HOME" 2>&1 \
    | grep '^--' | awk '{ print $3 }' | grep -v '\. \(css\|js\|png\|gif\|jpg\)$' | sort | uniq > $OUTPUT

## tmux_cheatsheet.markdown

      
        
          
            
              
              1 file
            
          
          
            
              
              1039 forks
            
          
          
            
              
              64 comments
            
          
          
            
              
              4765 stars
            
          
        
        
          
              
          
          
            
                henrik
                / tmux_cheatsheet.markdown
            
            
              Created
              March 3, 2012 19:47
            
              
                tmux cheatsheet
              
          
        
      
        
  
      
    tmux cheatsheet

As configured in my dotfiles.
start new:
tmux

start new with session name:
	from bs4 import BeautifulSoup
	from urllib2 import Request, urlopen
	import decimal

	def findPrice(url, selector):
	userAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36"
	req = Request(url, None, {'User-Agent': userAgent})
	html = urlopen(req).read()
	soup = BeautifulSoup(html, "lxml")
	return decimal.Decimal(soup.select(selector)[0].contents[0].strip().strip("$"))
	#!/bin/bash

	HOME="http://www.yourdomain.com/some/page"
	DOMAINS="yourdomain.com"
	DEPTH=2
	OUTPUT="./urls.csv"

	wget -r --spider --delete-after --force-html -D "$DOMAINS" -l $DEPTH "$HOME" 2>&1 \
	\| grep '^--' \| awk '{ print $3 }' \| grep -v '\. \(css\\|js\\|png\\|gif\\|jpg\)$' \| sort \| uniq > $OUTPUT