Skip to content

Instantly share code, notes, and snippets.

@nyov
nyov / replace_response.py
Created May 21, 2014 04:33
(scrapy) use a custom html string as new response body
from scrapy.http import HtmlResponse
def parse(self, response):
body = cc_stripped(response.body) # modifyor get new response.body
response = HtmlResponse(response.url,
encoding='utf-8',
body=body.decode('utf-8'),
)
@nyov
nyov / ata-disk-mappings.sh
Created March 6, 2014 19:16
ata controller to disk mappings (e.g. ata0 has sda)
#!/bin/sh
# ata bus to harddisk mappings
for device in `ls /sys/block/ | grep '[hs]d'`; do
host=`ls /sys/block/${device}/../../../../scsi_host/`
unique_id=`cat /sys/block/${device}/../../../../scsi_host/${host}/unique_id`
echo "ata${unique_id} is scsi_host ${host} with attached device ${device}"
done
@nyov
nyov / requests_from_generator.py
Created February 28, 2014 06:38
scrapy ranged start_requests from generator
class MySpider(Spider):
# [...]
# start requests from generator
def start_requests(self):
url = 'http://some.page.tld/%s/category'
for page in xrange(1, 247):
link = url % page
yield Request(url=link)
@nyov
nyov / DefaultsItem.py
Created February 12, 2014 23:39
scrapy item with default values
class DefaultsItem(Item):
""" Item with default values """
def __getitem__(self, key):
try:
return self._values[key]
except KeyError:
field = self.fields[key]
if 'default' in field:
return field['default']
def cc_stripped(x, extended=False):
""" strip control characters from string """
if extended:
# also strip extended characters
return "".join([i for i in x if ord(i) in range(32, 126)])
return "".join([i for i in x if ord(i) in range(32, 127)])
def parse(self, response):
body = cc_stripped(response.body)
def __init__(self, *args, **kwargs):
super(Spider, self).__init__(*args, **kwargs)
dispatcher.connect(self.spider_idle, signal=signals.spider_idle)
logout_done = False
def spider_idle(self, spider):
if spider != self: return
if self.logout_done: return
self.crawler.engine.schedule(self.logout(), spider)
raise DontCloseSpider('Session logout proceeding')
@nyov
nyov / adbapi.py
Last active August 17, 2017 05:53 — forked from powdahound/adbapi.py
from twisted.enterprise import adbapi
from twisted.python import log
import MySQLdb
class ReconnectingConnectionPool(adbapi.ConnectionPool):
"""Reconnecting adbapi connection pool for MySQL.
This class improves on the solution posted at
http://www.gelens.org/2008/09/12/reinitializing-twisted-connectionpool/
by checking exceptions by error code and only disconnecting the current
@nyov
nyov / pysmjs.py
Created July 19, 2013 15:31
python spidermonkey javascript evaluation example
import os
import subprocess
def spidermonk(script, user):
""" JavaScript wrapper
smjs notes: not recommended for production use,
as it contains dangerous (security-wise) debugging features.
-- Make sure not to load unsafe javascripts!
"""
@nyov
nyov / magento-cli.py
Last active August 15, 2021 12:57
A Magento REST API example with rauth as OAuth provider. For Magento 1
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from rauth.service import OAuth1Service
# Create consumer key & secret in your Magento Admin interface
# For an API Guideline see:
# http://www.magentocommerce.com/api/rest/authentication/oauth_authentication.html
#
# Short Magento setup explanation:
@nyov
nyov / genericOnLoadEvent.js
Created February 1, 2011 18:12
Reasonably portable, non-framework way of having your script set a function to run at load time
/**
* @source http://stackoverflow.com/questions/807878/javascript-that-executes-after-page-load#807997
*/
var yourFunctionName = function(){};
if(window.attachEvent) {
window.attachEvent('onload', yourFunctionName);
} else {
if(window.onload) {
var curronload = window.onload;