Skip to content

Instantly share code, notes, and snippets.

View stav's full-sized avatar
💭
c0d1ng

Steven Almeroth stav

💭
c0d1ng
View GitHub Profile
@stav
stav / gist:3520611
Created August 29, 2012 23:54
Google Places API Search
#!/usr/bin/python
# Google Places Search
#
# Use the Google Places API to text search for the supplied keywords and output
# the first result to standard out.
import sys
import json
import argparse
@stav
stav / gist:4191165
Created December 2, 2012 21:33
Generic PHP debug printer
<?php
/**
* generic debug printer
*
* Because I didn't like having to pass two arguments to a debug printer, namely the
* evaluated and un-evaluated expressions, like: $baker->bread and "baker.bread", i.e.,
* I only wanted to pass the un-evaluated string and let the print routine do the
* evaluating. This script does that proceduraly, i.e., not in a function, so expression
* scope is not changed.
*
@stav
stav / gist:4356269
Created December 21, 2012 22:24
Scrapy partial response downloader middleware
class PartialResponse(object):
""" Downloader middleware to only return the first n bytes
"""
def process_response(self, request, response, spider):
max_size = getattr(spider, 'response_max_size', 0)
if max_size and len(response.body) > max_size:
h = response.headers.copy()
h['Content-Length'] = max_size
response = response.replace(
body=response.body.encode('utf-8')[:max_size],
@stav
stav / gist:5137869
Last active December 14, 2015 19:39
Scrapy blocking spider that renders JavaScript with PyQt4
from PyQt4.QtCore import QUrl
from PyQt4.QtGui import QApplication
from PyQt4.QtWebKit import QWebPage
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from scrapy.spider import BaseSpider
from scrapy.http import HtmlResponse
class Render(QWebPage):
def __init__(self, url):
@stav
stav / gist:5152476
Last active December 14, 2015 21:39
Crawler project running Scrapy from a script
# main.py:
from project.spiders.log_test import TestSpider as EstiloMASpider
from scrapy.xlib.pydispatch import dispatcher
from scrapy.crawler import Crawler
from twisted.internet import reactor
from scrapy.utils.project import get_project_settings
from scrapy import log, signals
@stav
stav / gist:5337954
Last active December 15, 2015 23:09
Scrapinghub API Job Log Sorter
import sys
import json
import argparse
from os.path import exists
from pprint import pprint
from urllib import urlencode, urlretrieve
from urllib2 import urlopen
from urlparse import urlsplit, parse_qs
@stav
stav / ytclipr.py
Last active August 29, 2015 14:02
Python YouTube downloader: :see https://github.com/stav/clipy
# Python YouTube downloader
# 1. start script
# 2. copy youtube url into clipboard
# 3. press ctrl-d to start downloading
import sys
import pygtk
pygtk.require('2.0')
import gtk
@stav
stav / rmpyc
Created July 10, 2014 19:34
rm Python bytecode files (.pyc)
#!/bin/bash
if [ -n "$1" ]; then
TARGET="$1"
else
TARGET="."
fi
command="find $TARGET -name '*.pyc' 2>/dev/null"
@stav
stav / RouteSpider.py
Last active August 29, 2015 14:07
Scrapy spider with stream-lined routing and Item Loader processing
"""
Routed Crawler
"""
class Route(dict):
"""Spider route request"""
pass
class Router(scrapy.Spider):
"""Spider routing and loader handling"""
@stav
stav / sitemap_generator_pipeline.py
Last active December 9, 2016 16:05
Scrapy sitemap generator pipeline
""" pipelines.py """
import collections
import scrapy
import scrapy.contrib.exporter
import myproject
class SitemapPipeline(object):
"""
Sitemap builder