Skip to content

Instantly share code, notes, and snippets.

@nyov
nyov / mozlz4a.py
Last active November 1, 2020 12:15 — forked from Tblue/mozlz4a.py
MozLz4a compression/decompression utility
#!/usr/bin/python3
#
# Decompressor/compressor for files in Mozilla's "mozLz4" format. Firefox uses this file format to
# compress e. g. bookmark backups (*.jsonlz4).
#
# This file format is in fact just plain LZ4 data with a custom header (magic number [8 bytes] and
# uncompressed file size [4 bytes, little endian]).
#
# This Python 3 script requires the LZ4 bindings for Python, see: https://pypi.python.org/pypi/lz4
#
@nyov
nyov / rssspider.py
Last active December 6, 2020 09:49
Scrapy RSSSpider using feedparser
# -*- coding: utf-8 -*-
import logging
import scrapy
import feedparser
class RSSSpider(scrapy.Spider):
name = "rss"
# Can pass some URLs on the commandline:
@nyov
nyov / adhostsblock.sh
Last active November 1, 2015 14:25
Adblock using /etc/hosts (need no stupid browser plugin)
#!/bin/sh
#
# (c) 2013 "nyov"
set -e
#
# Script to download, update and append blocklist to /etc/hosts
# for more info, visit http://winhelp2002.mvps.org/hosts.htm
#
@nyov
nyov / voat.py
Created August 19, 2015 19:41
Voat.co Spider
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function
import logging
from scrapy.utils.log import configure_logging
from scrapy.spiders import Spider
from scrapy.exceptions import CloseSpider
from scrapy.http import Request
@nyov
nyov / gh-repo-contributions-stats.py
Created July 13, 2015 19:52
GitHub repo stats markdown printer, used for a project relicensing callout
#!/usr/bin/python3
# by nyov, Public Domain
from __future__ import unicode_literals, print_function
import six
from six.moves import cPickle as pickle
# https://github.com/sigmavirus24/github3.py
@nyov
nyov / noproxy.py
Last active April 15, 2020 13:01
Test environment proxy settings
#!/usr/bin/env python
from __future__ import print_function
import sys, os
# This test should show how urllib.proxy_bypass_environment()
# doesn't handle proxy environment variables correctly
# on *NIX systems.
# (It does not understand IPs or CIDR notations.)
#
# For a possible solution see the `requests` library:
@nyov
nyov / delay_repeat_spider.py
Created March 18, 2015 00:44
scrapy spider example on using reactor.callLater() for delays and repetition.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# A spider example on using reactor.callLater()
# for delays and repetition.
# scrapy 0.24
import scrapy
from twisted.internet import reactor, defer
@nyov
nyov / example-usage.sql
Last active March 8, 2022 06:50 — forked from myitcv/time_travel_trigger.sql
postgres timetravel using triggers (row-level versioning)
DROP TABLE IF EXISTS fruits;
CREATE TABLE fruits (
id SERIAL NOT NULL,
name TEXT,
valid_from TIMESTAMP NOT NULL,
valid_to TIMESTAMP NOT NULL
);
DROP TRIGGER IF EXISTS fruits_before ON fruits;
@nyov
nyov / rados-binarytest.py
Created September 20, 2014 04:58
python-rados Ioctx.aio_read() not escaping \0
#!/usr/bin/python
import rados
# choose whatever you have, lzma, zlib, snappy...
# lzo seems to nicely represent the bug
compressor = __import__('lzo', fromlist=[''])
def _complete(completion, data_read):
print "oncomplete called:"
@nyov
nyov / simple_login_spider.py
Created September 13, 2014 17:40
basic scrapy login
from scrapy.http import Request
from scrapy.exceptions import CloseSpider
from scrapy.selector import Selector
class MySpider(Spider):
name = ''
allowed_domains = [
]
start_urls = [