umrashrf/0001-added-basic-selectors-files-gitignore-and-readme.patch Secret

## 0001-added-basic-selectors-files-gitignore-and-readme.patch
From 2b781158628b586bf085888dbd8d32334fa4bf6a Mon Sep 17 00:00:00 2001
From: Umair Ashraf <umr.ashrf@gmail.com>
Date: Sat, 21 Feb 2015 20:04:07 +0500
Subject: [PATCH 1/5] added basic selectors files -- gitignore and readme

---
 .gitignore | 11 +++++++++++
 README.md  |  3 +++
 2 files changed, 14 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 README.md

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..837a67b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,11 @@
+*.pyc
+_trial_temp*
+dropin.cache
+docs/build
+*egg-info
+.tox
+venv
+build
+dist
+.idea
+.html
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..6196415
--- /dev/null
+++ b/README.md
@@ -0,0 +1,3 @@
+# Selectors
+
+Selectors provide high level API for XML and HTML parsing using XPath and CSS selectors in Python.
--
1.9.1

## 0002-made-selectors-independent-of-scrapy.patch
From eb60eb2ceaac01d23ec8167f4f3d797ed6c03014 Mon Sep 17 00:00:00 2001
From: Umair Ashraf <umr.ashrf@gmail.com>
Date: Sat, 21 Feb 2015 20:13:35 +0500
Subject: [PATCH 2/5] made selectors independent of scrapy

---
 selectors/__init__.py        |  10 +-
 selectors/common.py          |  22 ++++
 selectors/csstranslator.py   |  16 +--
 selectors/exceptions.py      |   5 +
 selectors/lxmldocument.py    |  31 ------
 selectors/lxmlsel.py         |  50 ---------
 selectors/unified.py         |  63 +++--------
 selectors/utils/decorator.py |  23 +---
 selectors/utils/misc.py      |  86 +--------------
 selectors/utils/python.py    | 249 -------------------------------------------
 10 files changed, 58 insertions(+), 497 deletions(-)
 create mode 100644 selectors/common.py
 create mode 100644 selectors/exceptions.py
 delete mode 100644 selectors/lxmldocument.py
 delete mode 100644 selectors/lxmlsel.py

diff --git a/selectors/__init__.py b/selectors/__init__.py
index bfbde4d..97eb9d5 100644
--- a/selectors/__init__.py
+++ b/selectors/__init__.py
@@ -1,5 +1,5 @@
-"""
-Selectors
-"""
-from scrapy.selector.unified import *
-from scrapy.selector.lxmlsel import *
+
+__version__ = '0.0.1'
+
+
+from selectors.unified import *
diff --git a/selectors/common.py b/selectors/common.py
new file mode 100644
index 0000000..4cbf1ec
--- /dev/null
+++ b/selectors/common.py
@@ -0,0 +1,22 @@
+"""
+We need these things in Scrapy and Selectors packages both
+"""
+from lxml import etree
+
+from .csstranslator import SelectorHTMLTranslator, SelectorGenericTranslator
+
+
+class SafeXMLParser(etree.XMLParser):
+    def __init__(self, *args, **kwargs):
+        kwargs.setdefault('resolve_entities', False)
+        super(SafeXMLParser, self).__init__(*args, **kwargs)
+
+
+_ctgroup = {
+    'html': {'_parser': etree.HTMLParser,
+             '_csstranslator': SelectorHTMLTranslator(),
+             '_tostring_method': 'html'},
+    'xml': {'_parser': SafeXMLParser,
+            '_csstranslator': SelectorGenericTranslator(),
+            '_tostring_method': 'xml'},
+}
diff --git a/selectors/csstranslator.py b/selectors/csstranslator.py
index 7482837..2148a10 100644
--- a/selectors/csstranslator.py
+++ b/selectors/csstranslator.py
@@ -3,7 +3,7 @@ from cssselect.xpath import _unicode_safe_getattr, XPathExpr, ExpressionError
 from cssselect.parser import FunctionalPseudoElement


-class ScrapyXPathExpr(XPathExpr):
+class SelectorXPathExpr(XPathExpr):

     textnode = False
     attribute = None
@@ -16,7 +16,7 @@ class ScrapyXPathExpr(XPathExpr):
         return x

     def __str__(self):
-        path = super(ScrapyXPathExpr, self).__str__()
+        path = super(SelectorXPathExpr, self).__str__()
         if self.textnode:
             if path == '*':
                 path = 'text()'
@@ -33,7 +33,7 @@ class ScrapyXPathExpr(XPathExpr):
         return path

     def join(self, combiner, other):
-        super(ScrapyXPathExpr, self).join(combiner, other)
+        super(SelectorXPathExpr, self).join(combiner, other)
         self.textnode = other.textnode
         self.attribute = other.attribute
         return self
@@ -43,7 +43,7 @@ class TranslatorMixin(object):

     def xpath_element(self, selector):
         xpath = super(TranslatorMixin, self).xpath_element(selector)
-        return ScrapyXPathExpr.from_xpath(xpath)
+        return SelectorXPathExpr.from_xpath(xpath)

     def xpath_pseudo_element(self, xpath, pseudo_element):
         if isinstance(pseudo_element, FunctionalPseudoElement):
@@ -71,18 +71,18 @@ class TranslatorMixin(object):
             raise ExpressionError(
                 "Expected a single string or ident for ::attr(), got %r"
                 % function.arguments)
-        return ScrapyXPathExpr.from_xpath(xpath,
+        return SelectorXPathExpr.from_xpath(xpath,
             attribute=function.arguments[0].value)

     def xpath_text_simple_pseudo_element(self, xpath):
         """Support selecting text nodes using ::text pseudo-element"""
-        return ScrapyXPathExpr.from_xpath(xpath, textnode=True)
+        return SelectorXPathExpr.from_xpath(xpath, textnode=True)


-class ScrapyGenericTranslator(TranslatorMixin, GenericTranslator):
+class SelectorGenericTranslator(TranslatorMixin, GenericTranslator):
     pass


-class ScrapyHTMLTranslator(TranslatorMixin, HTMLTranslator):
+class SelectorHTMLTranslator(TranslatorMixin, HTMLTranslator):
     pass

diff --git a/selectors/exceptions.py b/selectors/exceptions.py
new file mode 100644
index 0000000..9ed8b6b
--- /dev/null
+++ b/selectors/exceptions.py
@@ -0,0 +1,5 @@
+class SelectorsDeprecationWarning(Warning):
+    """Warning category for deprecated features, since the default
+    DeprecationWarning is silenced on Python 2.7+
+    """
+    pass
diff --git a/selectors/lxmldocument.py b/selectors/lxmldocument.py
deleted file mode 100644
index 817349b..0000000
--- a/selectors/lxmldocument.py
+++ /dev/null
@@ -1,31 +0,0 @@
-"""
-This module contains a simple class (LxmlDocument) which provides cache and
-garbage collection to lxml element tree documents.
-"""
-
-import weakref
-from lxml import etree
-from scrapy.utils.trackref import object_ref
-
-
-def _factory(response, parser_cls):
-    url = response.url
-    body = response.body_as_unicode().strip().encode('utf8') or '<html/>'
-    parser = parser_cls(recover=True, encoding='utf8')
-    return etree.fromstring(body, parser=parser, base_url=url)
-
-
-class LxmlDocument(object_ref):
-
-    cache = weakref.WeakKeyDictionary()
-    __slots__ = ['__weakref__']
-
-    def __new__(cls, response, parser=etree.HTMLParser):
-        cache = cls.cache.setdefault(response, {})
-        if parser not in cache:
-            obj = object_ref.__new__(cls)
-            cache[parser] = _factory(response, parser)
-        return cache[parser]
-
-    def __str__(self):
-        return "<LxmlDocument %s>" % self.root.tag
diff --git a/selectors/lxmlsel.py b/selectors/lxmlsel.py
deleted file mode 100644
index 070cb23..0000000
--- a/selectors/lxmlsel.py
+++ /dev/null
@@ -1,50 +0,0 @@
-"""
-XPath selectors based on lxml
-"""
-from scrapy.utils.deprecate import create_deprecated_class
-from .unified import Selector, SelectorList
-
-
-__all__ = ['HtmlXPathSelector', 'XmlXPathSelector', 'XPathSelector',
-           'XPathSelectorList']
-
-def _xpathselector_css(self, *a, **kw):
-    raise RuntimeError('.css() method not available for %s, '
-                        'instantiate scrapy.Selector '
-                        'instead' % type(self).__name__)
-
-XPathSelector = create_deprecated_class(
-    'XPathSelector',
-    Selector,
-    {
-        '__slots__': (),
-        '_default_type': 'html',
-        'css': _xpathselector_css,
-    },
-    new_class_path='scrapy.Selector',
-    old_class_path='scrapy.selector.XPathSelector',
-)
-
-XmlXPathSelector = create_deprecated_class(
-    'XmlXPathSelector',
-    XPathSelector,
-    clsdict={
-        '__slots__': (),
-        '_default_type': 'xml',
-    },
-    new_class_path='scrapy.Selector',
-    old_class_path='scrapy.selector.XmlXPathSelector',
-)
-
-HtmlXPathSelector = create_deprecated_class(
-    'HtmlXPathSelector',
-    XPathSelector,
-    clsdict={
-        '__slots__': (),
-        '_default_type': 'html',
-    },
-    new_class_path='scrapy.Selector',
-    old_class_path='scrapy.selector.HtmlXPathSelector',
-)
-
-XPathSelectorList = create_deprecated_class('XPathSelectorList', SelectorList)
diff --git a/selectors/unified.py b/selectors/unified.py
index b8a3678..77b363a 100644
--- a/selectors/unified.py
+++ b/selectors/unified.py
@@ -1,57 +1,24 @@
 """
 XPath selectors based on lxml
 """
+import re

 from lxml import etree

-from scrapy.utils.misc import extract_regex
-from scrapy.utils.trackref import object_ref
-from scrapy.utils.python import unicode_to_str, flatten
-from scrapy.utils.decorator import deprecated
-from scrapy.http import HtmlResponse, XmlResponse
-from .lxmldocument import LxmlDocument
-from .csstranslator import ScrapyHTMLTranslator, ScrapyGenericTranslator
+from .utils.misc import extract_regex
+from .utils.python import flatten
+from .utils.decorator import deprecated
+from .common import _ctgroup


 __all__ = ['Selector', 'SelectorList']


-class SafeXMLParser(etree.XMLParser):
-    def __init__(self, *args, **kwargs):
-        kwargs.setdefault('resolve_entities', False)
-        super(SafeXMLParser, self).__init__(*args, **kwargs)
+class Selector(object):

-_ctgroup = {
-    'html': {'_parser': etree.HTMLParser,
-             '_csstranslator': ScrapyHTMLTranslator(),
-             '_tostring_method': 'html'},
-    'xml': {'_parser': SafeXMLParser,
-            '_csstranslator': ScrapyGenericTranslator(),
-            '_tostring_method': 'xml'},
-}
+    __slots__ = ['text', 'namespaces', 'type', '_expr', '_root',
+                 '_parser', '_csstranslator', '_tostring_method']

-
-def _st(response, st):
-    if st is None:
-        return 'xml' if isinstance(response, XmlResponse) else 'html'
-    elif st in ('xml', 'html'):
-        return st
-    else:
-        raise ValueError('Invalid type: %s' % st)
-
-
-def _response_from_text(text, st):
-    rt = XmlResponse if st == 'xml' else HtmlResponse
-    return rt(url='about:blank', encoding='utf-8',
-              body=unicode_to_str(text, 'utf-8'))
-
-
-class Selector(object_ref):
-
-    __slots__ = ['response', 'text', 'namespaces', 'type', '_expr', '_root',
-                 '__weakref__', '_parser', '_csstranslator', '_tostring_method']
-
-    _default_type = None
     _default_namespaces = {
         "re": "http://exslt.org/regular-expressions",

@@ -65,23 +32,23 @@ class Selector(object_ref):
     }
     _lxml_smart_strings = False

-    def __init__(self, response=None, text=None, type=None, namespaces=None,
+    def __init__(self, text=None, url=None, type='html', namespaces=None,
                  _root=None, _expr=None):
-        self.type = st = _st(response, type or self._default_type)
+        self.type = st = type
         self._parser = _ctgroup[st]['_parser']
         self._csstranslator = _ctgroup[st]['_csstranslator']
         self._tostring_method = _ctgroup[st]['_tostring_method']

+        self.text = text
         if text is not None:
-            response = _response_from_text(text, st)
+            body = text.strip().encode('utf8') or '<html/>'
+            parser_obj = self._parser(recover=True, encoding='utf8')
+            _root = etree.fromstring(body, base_url=url, parser=parser_obj)

-        if response is not None:
-            _root = LxmlDocument(response, self._parser)
-
-        self.response = response
         self.namespaces = dict(self._default_namespaces)
         if namespaces is not None:
             self.namespaces.update(namespaces)
+
         self._root = _root
         self._expr = _expr

diff --git a/selectors/utils/decorator.py b/selectors/utils/decorator.py
index 38bee1a..2177a9a 100644
--- a/selectors/utils/decorator.py
+++ b/selectors/utils/decorator.py
@@ -1,9 +1,7 @@
 import warnings
 from functools import wraps

-from twisted.internet import defer, threads
-
-from scrapy.exceptions import ScrapyDeprecationWarning
+from selectors.exceptions import SelectorsDeprecationWarning


 def deprecated(use_instead=None):
@@ -17,7 +15,7 @@ def deprecated(use_instead=None):
             message = "Call to deprecated function %s." % func.__name__
             if use_instead:
                 message += " Use %s instead." % use_instead
-            warnings.warn(message, category=ScrapyDeprecationWarning, stacklevel=2)
+            warnings.warn(message, category=SelectorsDeprecationWarning, stacklevel=2)
             return func(*args, **kwargs)
         return wrapped

@@ -25,20 +23,3 @@ def deprecated(use_instead=None):
         deco = deco(use_instead)
         use_instead = None
     return deco
-
-
-def defers(func):
-    """Decorator to make sure a function always returns a deferred"""
-    @wraps(func)
-    def wrapped(*a, **kw):
-        return defer.maybeDeferred(func, *a, **kw)
-    return wrapped
-
-def inthread(func):
-    """Decorator to call a function in a thread and return a deferred with the
-    result
-    """
-    @wraps(func)
-    def wrapped(*a, **kw):
-        return threads.deferToThread(func, *a, **kw)
-    return wrapped
diff --git a/selectors/utils/misc.py b/selectors/utils/misc.py
index 3152db6..969e78e 100644
--- a/selectors/utils/misc.py
+++ b/selectors/utils/misc.py
@@ -1,76 +1,9 @@
 """Helper functions which doesn't fit anywhere else"""
 import re
-import hashlib
-from importlib import import_module
-from pkgutil import iter_modules

-import six
 from w3lib.html import replace_entities

-from scrapy.utils.python import flatten
-from scrapy.item import BaseItem
-
-
-_ITERABLE_SINGLE_VALUES = dict, BaseItem, six.text_type, bytes
-
-
-def arg_to_iter(arg):
-    """Convert an argument to an iterable. The argument can be a None, single
-    value, or an iterable.
-
-    Exception: if arg is a dict, [arg] will be returned
-    """
-    if arg is None:
-        return []
-    elif not isinstance(arg, _ITERABLE_SINGLE_VALUES) and hasattr(arg, '__iter__'):
-        return arg
-    else:
-        return [arg]
-
-
-def load_object(path):
-    """Load an object given its absolute object path, and return it.
-
-    object can be a class, function, variable o instance.
-    path ie: 'scrapy.contrib.downloadermiddelware.redirect.RedirectMiddleware'
-    """
-
-    try:
-        dot = path.rindex('.')
-    except ValueError:
-        raise ValueError("Error loading object '%s': not a full path" % path)
-
-    module, name = path[:dot], path[dot+1:]
-    mod = import_module(module)
-
-    try:
-        obj = getattr(mod, name)
-    except AttributeError:
-        raise NameError("Module '%s' doesn't define any object named '%s'" % (module, name))
-
-    return obj
-
-
-def walk_modules(path):
-    """Loads a module and all its submodules from a the given module path and
-    returns them. If *any* module throws an exception while importing, that
-    exception is thrown back.
-
-    For example: walk_modules('scrapy.utils')
-    """
-
-    mods = []
-    mod = import_module(path)
-    mods.append(mod)
-    if hasattr(mod, '__path__'):
-        for _, subpath, ispkg in iter_modules(mod.__path__):
-            fullpath = path + '.' + subpath
-            if ispkg:
-                mods += walk_modules(fullpath)
-            else:
-                submod = import_module(fullpath)
-                mods.append(submod)
-    return mods
+from .python import flatten


 def extract_regex(regex, text, encoding='utf-8'):
@@ -94,20 +27,3 @@ def extract_regex(regex, text, encoding='utf-8'):
         return [replace_entities(s, keep=['lt', 'amp']) for s in strings]
     else:
         return [replace_entities(unicode(s, encoding), keep=['lt', 'amp']) for s in strings]
-
-
-def md5sum(file):
-    """Calculate the md5 checksum of a file-like object without reading its
-    whole content in memory.
-
-    >>> from io import BytesIO
-    >>> md5sum(BytesIO(b'file content to hash'))
-    '784406af91dd5a54fbb9c84c2236595a'
-    """
-    m = hashlib.md5()
-    while 1:
-        d = file.read(8096)
-        if not d:
-            break
-        m.update(d)
-    return m.hexdigest()
diff --git a/selectors/utils/python.py b/selectors/utils/python.py
index 551d337..beb62f0 100644
--- a/selectors/utils/python.py
+++ b/selectors/utils/python.py
@@ -1,19 +1,3 @@
-"""
-This module contains essential stuff that should've come with Python itself ;)
-
-It also contains functions (or functionality) which is in Python versions
-higher than 2.5 which used to be the lowest version supported by Scrapy.
-
-"""
-import os
-import re
-import inspect
-import weakref
-import errno
-import six
-from functools import partial, wraps
-
-
 def flatten(x):
     """flatten(sequence) -> list

@@ -34,236 +18,3 @@ def flatten(x):
         else:
             result.append(el)
     return result
-
-
-def unique(list_, key=lambda x: x):
-    """efficient function to uniquify a list preserving item order"""
-    seen = set()
-    result = []
-    for item in list_:
-        seenkey = key(item)
-        if seenkey in seen:
-            continue
-        seen.add(seenkey)
-        result.append(item)
-    return result
-
-
-def str_to_unicode(text, encoding=None, errors='strict'):
-    """Return the unicode representation of text in the given encoding. Unlike
-    .encode(encoding) this function can be applied directly to a unicode
-    object without the risk of double-decoding problems (which can happen if
-    you don't use the default 'ascii' encoding)
-    """
-
-    if encoding is None:
-        encoding = 'utf-8'
-    if isinstance(text, str):
-        return text.decode(encoding, errors)
-    elif isinstance(text, unicode):
-        return text
-    else:
-        raise TypeError('str_to_unicode must receive a str or unicode object, got %s' % type(text).__name__)
-
-def unicode_to_str(text, encoding=None, errors='strict'):
-    """Return the str representation of text in the given encoding. Unlike
-    .encode(encoding) this function can be applied directly to a str
-    object without the risk of double-decoding problems (which can happen if
-    you don't use the default 'ascii' encoding)
-    """
-
-    if encoding is None:
-        encoding = 'utf-8'
-    if isinstance(text, unicode):
-        return text.encode(encoding, errors)
-    elif isinstance(text, str):
-        return text
-    else:
-        raise TypeError('unicode_to_str must receive a unicode or str object, got %s' % type(text).__name__)
-
-def re_rsearch(pattern, text, chunk_size=1024):
-    """
-    This function does a reverse search in a text using a regular expression
-    given in the attribute 'pattern'.
-    Since the re module does not provide this functionality, we have to find for
-    the expression into chunks of text extracted from the end (for the sake of efficiency).
-    At first, a chunk of 'chunk_size' kilobytes is extracted from the end, and searched for
-    the pattern. If the pattern is not found, another chunk is extracted, and another
-    search is performed.
-    This process continues until a match is found, or until the whole file is read.
-    In case the pattern wasn't found, None is returned, otherwise it returns a tuple containing
-    the start position of the match, and the ending (regarding the entire text).
-    """
-    def _chunk_iter():
-        offset = len(text)
-        while True:
-            offset -= (chunk_size * 1024)
-            if offset <= 0:
-                break
-            yield (text[offset:], offset)
-        yield (text, 0)
-
-    pattern = re.compile(pattern) if isinstance(pattern, basestring) else pattern
-    for chunk, offset in _chunk_iter():
-        matches = [match for match in pattern.finditer(chunk)]
-        if matches:
-            return (offset + matches[-1].span()[0], offset + matches[-1].span()[1])
-    return None
-
-def memoizemethod_noargs(method):
-    """Decorator to cache the result of a method (without arguments) using a
-    weak reference to its object
-    """
-    cache = weakref.WeakKeyDictionary()
-    @wraps(method)
-    def new_method(self, *args, **kwargs):
-        if self not in cache:
-            cache[self] = method(self, *args, **kwargs)
-        return cache[self]
-    return new_method
-
-_BINARYCHARS = set(map(chr, range(32))) - set(["\0", "\t", "\n", "\r"])
-
-def isbinarytext(text):
-    """Return True if the given text is considered binary, or false
-    otherwise, by looking for binary bytes at their chars
-    """
-    assert isinstance(text, str), "text must be str, got '%s'" % type(text).__name__
-    return any(c in _BINARYCHARS for c in text)
-
-def get_func_args(func, stripself=False):
-    """Return the argument name list of a callable"""
-    if inspect.isfunction(func):
-        func_args, _, _, _ = inspect.getargspec(func)
-    elif inspect.isclass(func):
-        return get_func_args(func.__init__, True)
-    elif inspect.ismethod(func):
-        return get_func_args(func.__func__, True)
-    elif inspect.ismethoddescriptor(func):
-        return []
-    elif isinstance(func, partial):
-        return [x for x in get_func_args(func.func)[len(func.args):]
-                if not (func.keywords and x in func.keywords)]
-    elif hasattr(func, '__call__'):
-        if inspect.isroutine(func):
-            return []
-        elif getattr(func, '__name__', None) == '__call__':
-            return []
-        else:
-            return get_func_args(func.__call__, True)
-    else:
-        raise TypeError('%s is not callable' % type(func))
-    if stripself:
-        func_args.pop(0)
-    return func_args
-
-def get_spec(func):
-    """Returns (args, kwargs) tuple for a function
-    >>> import re
-    >>> get_spec(re.match)
-    (['pattern', 'string'], {'flags': 0})
-
-    >>> class Test(object):
-    ...     def __call__(self, val):
-    ...         pass
-    ...     def method(self, val, flags=0):
-    ...         pass
-
-    >>> get_spec(Test)
-    (['self', 'val'], {})
-
-    >>> get_spec(Test.method)
-    (['self', 'val'], {'flags': 0})
-
-    >>> get_spec(Test().method)
-    (['self', 'val'], {'flags': 0})
-    """
-
-    if inspect.isfunction(func) or inspect.ismethod(func):
-        spec = inspect.getargspec(func)
-    elif hasattr(func, '__call__'):
-        spec = inspect.getargspec(func.__call__)
-    else:
-        raise TypeError('%s is not callable' % type(func))
-
-    defaults = spec.defaults or []
-
-    firstdefault = len(spec.args) - len(defaults)
-    args = spec.args[:firstdefault]
-    kwargs = dict(zip(spec.args[firstdefault:], defaults))
-    return args, kwargs
-
-def equal_attributes(obj1, obj2, attributes):
-    """Compare two objects attributes"""
-    # not attributes given return False by default
-    if not attributes:
-        return False
-
-    for attr in attributes:
-        # support callables like itemgetter
-        if callable(attr):
-            if not attr(obj1) == attr(obj2):
-                return False
-        else:
-            # check that objects has attribute
-            if not hasattr(obj1, attr):
-                return False
-            if not hasattr(obj2, attr):
-                return False
-            # compare object attributes
-            if not getattr(obj1, attr) == getattr(obj2, attr):
-                return False
-    # all attributes equal
-    return True
-
-
-class WeakKeyCache(object):
-
-    def __init__(self, default_factory):
-        self.default_factory = default_factory
-        self._weakdict = weakref.WeakKeyDictionary()
-
-    def __getitem__(self, key):
-        if key not in self._weakdict:
-            self._weakdict[key] = self.default_factory(key)
-        return self._weakdict[key]
-
-
-def stringify_dict(dct_or_tuples, encoding='utf-8', keys_only=True):
-    """Return a (new) dict with the unicode keys (and values if, keys_only is
-    False) of the given dict converted to strings. `dct_or_tuples` can be a
-    dict or a list of tuples, like any dict constructor supports.
-    """
-    d = {}
-    for k, v in six.iteritems(dict(dct_or_tuples)):
-        k = k.encode(encoding) if isinstance(k, unicode) else k
-        if not keys_only:
-            v = v.encode(encoding) if isinstance(v, unicode) else v
-        d[k] = v
-    return d
-
-def is_writable(path):
-    """Return True if the given path can be written (if it exists) or created
-    (if it doesn't exist)
-    """
-    if os.path.exists(path):
-        return os.access(path, os.W_OK)
-    else:
-        return os.access(os.path.dirname(path), os.W_OK)
-
-def setattr_default(obj, name, value):
-    """Set attribute value, but only if it's not already set. Similar to
-    setdefault() for dicts.
-    """
-    if not hasattr(obj, name):
-        setattr(obj, name, value)
-
-
-def retry_on_eintr(function, *args, **kw):
-    """Run a function and retry it while getting EINTR errors"""
-    while True:
-        try:
-            return function(*args, **kw)
-        except IOError as e:
-            if e.errno != errno.EINTR:
-                raise
--
1.9.1

## 0003-added-selectors-specific-python-package-files.patch
From 7cf971c08ece0007d891045497f2e164b3bf8044 Mon Sep 17 00:00:00 2001
From: Umair Ashraf <umr.ashrf@gmail.com>
Date: Sat, 21 Feb 2015 20:05:30 +0500
Subject: [PATCH 3/5] added selectors specific python package files

---
 MANIFEST.in      |  2 ++
 requirements.txt |  3 +++
 setup.py         | 28 ++++++++++++++++++++++++++++
 3 files changed, 33 insertions(+)
 create mode 100644 MANIFEST.in
 create mode 100644 requirements.txt
 create mode 100644 setup.py

diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..2970947
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,2 @@
+include README.md
+include MANIFEST.in
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..9a0bc80
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+lxml
+w3lib>=1.8.0
+cssselect>=0.9
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..c3f8aa0
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,28 @@
+import re
+
+from setuptools import setup, find_packages
+
+
+(version, ) = re.findall(r"__version__[^=]*=[^']*[']([^']+)[']",
+                        open('selectors/__init__.py').read())
+
+
+setup(
+    name='Selectors',
+    version=version,
+    url='http://github.com/scrapy/selectors',
+    description='Selectors used by Scrapy framework',
+    long_description=open('README.md').read(),
+    author='Selectors developers',
+    maintainer='Scrapy developers',
+    maintainer_email='info@scrapy.org',
+    license='BSD',
+    packages=find_packages(exclude=('tests', 'tests.*')),
+    include_package_data=True,
+    zip_safe=False,
+    install_requires=[
+        'lxml',
+        'w3lib>=1.8.0',
+        'cssselect>=0.9',
+    ],
+)
--
1.9.1

## 0004-removed-scrapy-dependent-tests-and-changed-code-to-s.patch
From 109c2e096de49494e745c885df8901698b58d11e Mon Sep 17 00:00:00 2001
From: Umair Ashraf <umr.ashrf@gmail.com>
Date: Sat, 21 Feb 2015 20:24:31 +0500
Subject: [PATCH 4/5] removed scrapy dependent tests and changed code to suit
 selectors package

---
 tests/__init__.py                    |  14 ---
 tests/test_selector.py               | 222 ++++-------------------------------
 tests/test_selector_csstranslator.py |  12 +-
 3 files changed, 31 insertions(+), 217 deletions(-)
 delete mode 100644 tests/__init__.py

diff --git a/tests/__init__.py b/tests/__init__.py
deleted file mode 100644
index 54e79b3..0000000
--- a/tests/__init__.py
+++ /dev/null
@@ -1,14 +0,0 @@
-"""
-tests: this package contains all Scrapy unittests
-
-see http://doc.scrapy.org/en/latest/contributing.html#running-tests
-"""
-
-import os
-
-tests_datadir = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'sample_data')
-
-def get_testdata(*paths):
-    """Return test data"""
-    path = os.path.join(tests_datadir, *paths)
-    return open(path, 'rb').read()
diff --git a/tests/test_selector.py b/tests/test_selector.py
index 6fbb451..91c7d31 100644
--- a/tests/test_selector.py
+++ b/tests/test_selector.py
@@ -1,11 +1,10 @@
 import re
 import warnings
 import weakref
+
 from twisted.trial import unittest
-from scrapy.exceptions import ScrapyDeprecationWarning
-from scrapy.http import TextResponse, HtmlResponse, XmlResponse
-from scrapy.selector import Selector
-from scrapy.selector.lxmlsel import XmlXPathSelector, HtmlXPathSelector, XPathSelector
+
+from selectors import Selector


 class SelectorTestCase(unittest.TestCase):
@@ -15,8 +14,7 @@ class SelectorTestCase(unittest.TestCase):
     def test_simple_selection(self):
         """Simple selector tests"""
         body = "<p><input name='a'value='1'/><input name='b'value='2'/></p>"
-        response = TextResponse(url="http://example.com", body=body)
-        sel = self.sscls(response)
+        sel = self.sscls(url="http://example.com", text=body)

         xl = sel.xpath('//input')
         self.assertEqual(2, len(xl))
@@ -38,8 +36,7 @@ class SelectorTestCase(unittest.TestCase):

     def test_representation_slice(self):
         body = u"<p><input name='{}' value='\xa9'/></p>".format(50 * 'b')
-        response = TextResponse(url="http://example.com", body=body, encoding='utf8')
-        sel = self.sscls(response)
+        sel = self.sscls(url="http://example.com", text=body)

         self.assertEqual(
             map(repr, sel.xpath('//input/@name')),
@@ -48,8 +45,7 @@ class SelectorTestCase(unittest.TestCase):

     def test_representation_unicode_query(self):
         body = u"<p><input name='{}' value='\xa9'/></p>".format(50 * 'b')
-        response = TextResponse(url="http://example.com", body=body, encoding='utf8')
-        sel = self.sscls(response)
+        sel = self.sscls(url="http://example.com", text=body)
         self.assertEqual(
             map(repr, sel.xpath(u'//input[@value="\xa9"]/@value')),
             ["<Selector xpath=u'//input[@value=\"\\xa9\"]/@value' data=u'\\xa9'>"]
@@ -57,8 +53,7 @@ class SelectorTestCase(unittest.TestCase):

     def test_select_unicode_query(self):
         body = u"<p><input name='\xa9' value='1'/></p>"
-        response = TextResponse(url="http://example.com", body=body, encoding='utf8')
-        sel = self.sscls(response)
+        sel = self.sscls(url="http://example.com", text=body)
         self.assertEqual(sel.xpath(u'//input[@name="\xa9"]/@value').extract(), [u'1'])

     def test_list_elements_type(self):
@@ -69,8 +64,7 @@ class SelectorTestCase(unittest.TestCase):

     def test_boolean_result(self):
         body = "<p><input name='a'value='1'/><input name='b'value='2'/></p>"
-        response = TextResponse(url="http://example.com", body=body)
-        xs = self.sscls(response)
+        xs = self.sscls(url="http://example.com", text=body)
         self.assertEquals(xs.xpath("//input[@name='a']/@name='a'").extract(), [u'1'])
         self.assertEquals(xs.xpath("//input[@name='a']/@name='n'").extract(), [u'0'])

@@ -86,18 +80,6 @@ class SelectorTestCase(unittest.TestCase):
         self.assertEqual(xs.xpath("//div").extract(),
                          [u'<div><img src="a.jpg"><p>Hello</p></img></div>'])

-    def test_flavor_detection(self):
-        text = '<div><img src="a.jpg"><p>Hello</div>'
-        sel = self.sscls(XmlResponse('http://example.com', body=text))
-        self.assertEqual(sel.type, 'xml')
-        self.assertEqual(sel.xpath("//div").extract(),
-                         [u'<div><img src="a.jpg"><p>Hello</p></img></div>'])
-
-        sel = self.sscls(HtmlResponse('http://example.com', body=text))
-        self.assertEqual(sel.type, 'html')
-        self.assertEqual(sel.xpath("//div").extract(),
-                         [u'<div><img src="a.jpg"><p>Hello</p></div>'])
-
     def test_nested_selectors(self):
         """Nested selector tests"""
         body = """<body>
@@ -113,8 +95,7 @@ class SelectorTestCase(unittest.TestCase):
                     </div>
                   </body>"""

-        response = HtmlResponse(url="http://example.com", body=body)
-        x = self.sscls(response)
+        x = self.sscls(url="http://example.com", text=body)
         divtwo = x.xpath('//div[@class="two"]')
         self.assertEqual(divtwo.xpath("//li").extract(),
                          ["<li>one</li>", "<li>two</li>", "<li>four</li>", "<li>five</li>", "<li>six</li>"])
@@ -145,8 +126,7 @@ class SelectorTestCase(unittest.TestCase):
         </test>
         """

-        response = XmlResponse(url="http://example.com", body=body)
-        x = self.sscls(response)
+        x = self.sscls(url="http://example.com", text=body, type="xml")

         x.register_namespace("somens", "http://scrapy.org")
         self.assertEqual(x.xpath("//somens:a/text()").extract(),
@@ -162,8 +142,7 @@ class SelectorTestCase(unittest.TestCase):
     <p:SecondTestTag><material>iron</material><price>90</price><p:name>Dried Rose</p:name></p:SecondTestTag>
 </BrowseNode>
         """
-        response = XmlResponse(url="http://example.com", body=body)
-        x = self.sscls(response)
+        x = self.sscls(url="http://example.com", text=body, type="xml")
         x.register_namespace("xmlns", "http://webservices.amazon.com/AWSECommerceService/2005-10-05")
         x.register_namespace("p", "http://www.scrapy.org/product")
         x.register_namespace("b", "http://somens.com")
@@ -184,8 +163,7 @@ class SelectorTestCase(unittest.TestCase):
                     </ul>
                     Age: 20
                   </div>"""
-        response = HtmlResponse(url="http://example.com", body=body)
-        x = self.sscls(response)
+        x = self.sscls(url="http://example.com", text=body)

         name_re = re.compile("Name: (\w+)")
         self.assertEqual(x.xpath("//ul/li").re(name_re),
@@ -193,12 +171,6 @@ class SelectorTestCase(unittest.TestCase):
         self.assertEqual(x.xpath("//ul/li").re("Age: (\d+)"),
                          ["10", "20"])

-    def test_re_intl(self):
-        body = """<div>Evento: cumplea\xc3\xb1os</div>"""
-        response = HtmlResponse(url="http://example.com", body=body, encoding='utf-8')
-        x = self.sscls(response)
-        self.assertEqual(x.xpath("//div").re("Evento: (\w+)"), [u'cumplea\xf1os'])
-
     def test_selector_over_text(self):
         hs = self.sscls(text='<root>lala</root>')
         self.assertEqual(hs.extract(), u'<html><body><root>lala</root></body></html>')
@@ -207,8 +179,7 @@ class SelectorTestCase(unittest.TestCase):
         self.assertEqual(xs.xpath('.').extract(), [u'<root>lala</root>'])

     def test_invalid_xpath(self):
-        response = XmlResponse(url="http://example.com", body="<html></html>")
-        x = self.sscls(response)
+        x = self.sscls(url="http://example.com", text="<html></html>")
         xpath = "//test[@foo='bar]"
         try:
             x.xpath(xpath)
@@ -219,43 +190,16 @@ class SelectorTestCase(unittest.TestCase):
         else:
             raise AssertionError("A invalid XPath does not raise an exception")

-    def test_http_header_encoding_precedence(self):
-        # u'\xa3'     = pound symbol in unicode
-        # u'\xc2\xa3' = pound symbol in utf-8
-        # u'\xa3'     = pound symbol in latin-1 (iso-8859-1)
-
-        meta = u'<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">'
-        head = u'<head>' + meta + u'</head>'
-        body_content = u'<span id="blank">\xa3</span>'
-        body = u'<body>' + body_content + u'</body>'
-        html = u'<html>' + head + body + u'</html>'
-        encoding = 'utf-8'
-        html_utf8 = html.encode(encoding)
-
-        headers = {'Content-Type': ['text/html; charset=utf-8']}
-        response = HtmlResponse(url="http://example.com", headers=headers, body=html_utf8)
-        x = self.sscls(response)
-        self.assertEquals(x.xpath("//span[@id='blank']/text()").extract(),
-                          [u'\xa3'])
-
     def test_empty_bodies(self):
         # shouldn't raise errors
-        r1 = TextResponse('http://www.example.com', body='')
-        self.sscls(r1).xpath('//text()').extract()
+        self.sscls(url='http://www.example.com', text='').xpath('//text()').extract()

     def test_null_bytes(self):
         # shouldn't raise errors
-        r1 = TextResponse('http://www.example.com', \
-                          body='<root>pre\x00post</root>', \
-                          encoding='utf-8')
-        self.sscls(r1).xpath('//text()').extract()
-
-    def test_badly_encoded_body(self):
-        # \xe9 alone isn't valid utf8 sequence
-        r1 = TextResponse('http://www.example.com', \
-                          body='<html><p>an Jos\xe9 de</p><html>', \
-                          encoding='utf-8')
-        self.sscls(r1).xpath('//text()').extract()
+        self.sscls(url='http://www.example.com',
+                    text='<root>pre\x00post</root>',
+                    type='xml') \
+            .xpath('//text()').extract()

     def test_select_on_unevaluable_nodes(self):
         r = self.sscls(text=u'<span class="big">some text</span>')
@@ -284,13 +228,6 @@ class SelectorTestCase(unittest.TestCase):
         self.assertEquals(x2.extract(), [u'<b>Options:</b>'])
     test_nested_select_on_text_nodes.skip = "Text nodes lost parent node reference in lxml"

-    def test_weakref_slots(self):
-        """Check that classes are using slots and are weak-referenceable"""
-        x = self.sscls()
-        weakref.ref(x)
-        assert not hasattr(x, '__dict__'), "%s does not use __slots__" % \
-            x.__class__.__name__
-
     def test_remove_namespaces(self):
         xml = """<?xml version="1.0" encoding="UTF-8"?>
 <feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-US" xmlns:media="http://search.yahoo.com/mrss/">
@@ -298,7 +235,7 @@ class SelectorTestCase(unittest.TestCase):
   <link type="application/atom+xml">
 </feed>
 """
-        sel = self.sscls(XmlResponse("http://example.com/feed.atom", body=xml))
+        sel = self.sscls(url="http://example.com/feed.atom", text=xml, type="xml")
         self.assertEqual(len(sel.xpath("//link")), 0)
         sel.remove_namespaces()
         self.assertEqual(len(sel.xpath("//link")), 2)
@@ -310,7 +247,7 @@ class SelectorTestCase(unittest.TestCase):
   <link atom:type="application/atom+xml">
 </feed>
 """
-        sel = self.sscls(XmlResponse("http://example.com/feed.atom", body=xml))
+        sel = self.sscls(url="http://example.com/feed.atom", text=xml, type="xml")
         self.assertEqual(len(sel.xpath("//link/@type")), 0)
         sel.remove_namespaces()
         self.assertEqual(len(sel.xpath("//link/@type")), 2)
@@ -334,17 +271,15 @@ class SelectorTestCase(unittest.TestCase):
                     </div>
                   </body>"""

-        response = HtmlResponse(url="http://example.com", body=body)
-
         # .getparent() is available for text nodes and attributes
         # only when smart_strings are on
-        x = self.sscls(response)
+        x = self.sscls(url="http://example.com", text=body)
         li_text = x.xpath('//li/text()')
         self.assertFalse(any(map(lambda e: hasattr(e._root, 'getparent'), li_text)))
         div_class = x.xpath('//div/@class')
         self.assertFalse(any(map(lambda e: hasattr(e._root, 'getparent'), div_class)))

-        x = SmartStringsSelector(response)
+        x = SmartStringsSelector(url="http://example.com", text=body)
         li_text = x.xpath('//li/text()')
         self.assertTrue(all(map(lambda e: hasattr(e._root, 'getparent'), li_text)))
         div_class = x.xpath('//div/@class')
@@ -355,116 +290,11 @@ class SelectorTestCase(unittest.TestCase):
             '<!DOCTYPE foo [ <!ELEMENT foo ANY > <!ENTITY xxe SYSTEM '\
             '"file:///etc/passwd" >]><foo>&xxe;</foo>'

-        response = XmlResponse('http://example.com', body=malicious_xml)
-        sel = self.sscls(response=response)
+        sel = self.sscls(url='http://example.com', text=malicious_xml, type="xml")

         self.assertEqual(sel.extract(), '<foo>&xxe;</foo>')


-class DeprecatedXpathSelectorTest(unittest.TestCase):
-
-    text = '<div><img src="a.jpg"><p>Hello</div>'
-
-    def test_warnings_xpathselector(self):
-        cls = XPathSelector
-        with warnings.catch_warnings(record=True) as w:
-            class UserClass(cls):
-                pass
-
-            # subclassing must issue a warning
-            self.assertEqual(len(w), 1, str(cls))
-            self.assertIn('scrapy.Selector', str(w[0].message))
-
-            # subclass instance doesn't issue a warning
-            usel = UserClass(text=self.text)
-            self.assertEqual(len(w), 1)
-
-            # class instance must issue a warning
-            sel = cls(text=self.text)
-            self.assertEqual(len(w), 2, str((cls, [x.message for x in w])))
-            self.assertIn('scrapy.Selector', str(w[1].message))
-
-            # subclass and instance checks
-            self.assertTrue(issubclass(cls, Selector))
-            self.assertTrue(isinstance(sel, Selector))
-            self.assertTrue(isinstance(usel, Selector))
-
-    def test_warnings_xmlxpathselector(self):
-        cls = XmlXPathSelector
-        with warnings.catch_warnings(record=True) as w:
-            class UserClass(cls):
-                pass
-
-            # subclassing must issue a warning
-            self.assertEqual(len(w), 1, str(cls))
-            self.assertIn('scrapy.Selector', str(w[0].message))
-
-            # subclass instance doesn't issue a warning
-            usel = UserClass(text=self.text)
-            self.assertEqual(len(w), 1)
-
-            # class instance must issue a warning
-            sel = cls(text=self.text)
-            self.assertEqual(len(w), 2, str((cls, [x.message for x in w])))
-            self.assertIn('scrapy.Selector', str(w[1].message))
-
-            # subclass and instance checks
-            self.assertTrue(issubclass(cls, Selector))
-            self.assertTrue(issubclass(cls, XPathSelector))
-            self.assertTrue(isinstance(sel, Selector))
-            self.assertTrue(isinstance(usel, Selector))
-            self.assertTrue(isinstance(sel, XPathSelector))
-            self.assertTrue(isinstance(usel, XPathSelector))
-
-    def test_warnings_htmlxpathselector(self):
-        cls = HtmlXPathSelector
-        with warnings.catch_warnings(record=True) as w:
-            class UserClass(cls):
-                pass
-
-            # subclassing must issue a warning
-            self.assertEqual(len(w), 1, str(cls))
-            self.assertIn('scrapy.Selector', str(w[0].message))
-
-            # subclass instance doesn't issue a warning
-            usel = UserClass(text=self.text)
-            self.assertEqual(len(w), 1)
-
-            # class instance must issue a warning
-            sel = cls(text=self.text)
-            self.assertEqual(len(w), 2, str((cls, [x.message for x in w])))
-            self.assertIn('scrapy.Selector', str(w[1].message))
-
-            # subclass and instance checks
-            self.assertTrue(issubclass(cls, Selector))
-            self.assertTrue(issubclass(cls, XPathSelector))
-            self.assertTrue(isinstance(sel, Selector))
-            self.assertTrue(isinstance(usel, Selector))
-            self.assertTrue(isinstance(sel, XPathSelector))
-            self.assertTrue(isinstance(usel, XPathSelector))
-
-    def test_xpathselector(self):
-        with warnings.catch_warnings(record=True):
-            hs = XPathSelector(text=self.text)
-            self.assertEqual(hs.select("//div").extract(),
-                             [u'<div><img src="a.jpg"><p>Hello</p></div>'])
-            self.assertRaises(RuntimeError, hs.css, 'div')
-
-    def test_htmlxpathselector(self):
-        with warnings.catch_warnings(record=True):
-            hs = HtmlXPathSelector(text=self.text)
-            self.assertEqual(hs.select("//div").extract(),
-                             [u'<div><img src="a.jpg"><p>Hello</p></div>'])
-            self.assertRaises(RuntimeError, hs.css, 'div')
-
-    def test_xmlxpathselector(self):
-        with warnings.catch_warnings(record=True):
-            xs = XmlXPathSelector(text=self.text)
-            self.assertEqual(xs.select("//div").extract(),
-                             [u'<div><img src="a.jpg"><p>Hello</p></img></div>'])
-            self.assertRaises(RuntimeError, xs.css, 'div')
-
-
 class ExsltTestCase(unittest.TestCase):

     sscls = Selector
@@ -479,8 +309,7 @@ class ExsltTestCase(unittest.TestCase):
         <a href="http://www.bayes.co.uk/xml/index.xml?/xml/utils/rechecker.xml">EXSLT match example</a>
         </div>
         """
-        response = TextResponse(url="http://example.com", body=body)
-        sel = self.sscls(response)
+        sel = self.sscls(url="http://example.com", text=body)

         # re:test()
         self.assertEqual(
@@ -557,8 +386,7 @@ class ExsltTestCase(unittest.TestCase):
           </div>
         </div>
         """
-        response = TextResponse(url="http://example.com", body=body)
-        sel = self.sscls(response)
+        sel = self.sscls(url="http://example.com", text=body)

         self.assertEqual(
             sel.xpath('''//div[@itemtype="http://schema.org/Event"]
diff --git a/tests/test_selector_csstranslator.py b/tests/test_selector_csstranslator.py
index 7ef9003..b648320 100644
--- a/tests/test_selector_csstranslator.py
+++ b/tests/test_selector_csstranslator.py
@@ -2,9 +2,10 @@
 Selector tests for cssselect backend
 """
 from twisted.trial import unittest
-from scrapy.http import HtmlResponse
-from scrapy.selector.csstranslator import ScrapyHTMLTranslator
-from scrapy.selector import Selector
+
+from selectors import Selector
+from selectors.csstranslator import SelectorHTMLTranslator
+
 from cssselect.parser import SelectorSyntaxError
 from cssselect.xpath import ExpressionError

@@ -47,7 +48,7 @@ HTMLBODY = '''

 class TranslatorMixinTest(unittest.TestCase):

-    tr_cls = ScrapyHTMLTranslator
+    tr_cls = SelectorHTMLTranslator

     def setUp(self):
         self.tr = self.tr_cls()
@@ -119,8 +120,7 @@ class CSSSelectorTest(unittest.TestCase):
     sscls = Selector

     def setUp(self):
-        self.htmlresponse = HtmlResponse('http://example.com', body=HTMLBODY)
-        self.sel = self.sscls(self.htmlresponse)
+        self.sel = self.sscls(url='http://example.com', text=HTMLBODY)

     def x(self, *a, **kw):
         return [v.strip() for v in self.sel.css(*a, **kw).extract() if v.strip()]
--
1.9.1

## 0005-added-selectors-tests-specific-support-files.patch
From eab029568b37d564bc8bbdb5b836cf0976a39cd1 Mon Sep 17 00:00:00 2001
From: Umair Ashraf <umr.ashrf@gmail.com>
Date: Sat, 21 Feb 2015 20:25:01 +0500
Subject: [PATCH 5/5] added selectors tests specific support files

---
 pytest.ini             |  4 ++++
 tests/requirements.txt |  3 +++
 tox.ini                | 14 ++++++++++++++
 3 files changed, 21 insertions(+)
 create mode 100644 pytest.ini
 create mode 100644 tests/requirements.txt
 create mode 100644 tox.ini

diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 0000000..cc48090
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,4 @@
+[pytest]
+python_files=test_*.py __init__.py
+addopts = --doctest-modules --assert=plain
+twisted = 1
diff --git a/tests/requirements.txt b/tests/requirements.txt
new file mode 100644
index 0000000..18ae516
--- /dev/null
+++ b/tests/requirements.txt
@@ -0,0 +1,3 @@
+Twisted>=10.0.0
+pytest-twisted
+mock
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000..617df26
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,14 @@
+# Tox (http://tox.testrun.org/) is a tool for running tests
+# in multiple virtualenvs. This configuration file will run the
+# test suite on all supported python versions. To use it, "pip install tox"
+# and then run "tox" from this directory.
+
+[tox]
+envlist = py27
+
+[testenv]
+deps =
+    -rrequirements.txt
+    -rtests/requirements.txt
+commands =
+    py.test {posargs:selectors tests}
--
1.9.1

## extract_selector.sh
#!/bin/bash

# startover
git checkout master
git branch -D selectors selector-code utils-code tests-code

# split scrapy/selector dir to selector-code branch
git checkout -b selector-code
git filter-branch -f --prune-empty \
    --subdirectory-filter scrapy/selector -- selector-code
# mv files to selectors/ dir without new commit
git filter-branch -f \
    --index-filter '
        git ls-files -s \
        | sed "s-\t-&selectors/-" \
        | GIT_INDEX_FILE=$GIT_INDEX_FILE.new git update-index --index-info \
        && mv $GIT_INDEX_FILE.new $GIT_INDEX_FILE'

# now we need to split utils
git checkout master

# split scrapy/utils dir to utils-code branch
git checkout -b utils-code
git filter-branch -f --prune-empty \
    --subdirectory-filter scrapy/utils -- utils-code
# only keep required utils files
git filter-branch -f \
    --prune-empty \
    --index-filter '
        git ls-tree -z -r --name-only --full-tree $GIT_COMMIT \
        | grep -z -v "^__init__.py$" \
        | grep -z -v "^decorator.py$" \
        | grep -z -v "^misc.py$" \
        | grep -z -v "^python.py$" \
        | xargs -0 -r git rm --cached -r
    ' \
    -- \
    utils-code
# mv files to selectors/utils/ dir without new commit
git filter-branch -f \
    --index-filter '
        git ls-files -s \
        | sed "s-\t-&selectors/utils/-" \
        | GIT_INDEX_FILE=$GIT_INDEX_FILE.new git update-index --index-info \
        && mv $GIT_INDEX_FILE.new $GIT_INDEX_FILE'

# now we need to split tests
git checkout master

# split tests dir to tests-code branch
git checkout -b tests-code
git filter-branch -f --prune-empty \
    --subdirectory-filter tests -- tests-code
# only keep required tests files
git filter-branch -f \
    --prune-empty \
    --index-filter '
        git ls-tree -z -r --name-only --full-tree $GIT_COMMIT \
        | grep -z -v "^__init__.py$" \
        | grep -z -v "^test_selector.py$" \
        | grep -z -v "^test_selector_csstranslator.py$" \
        | xargs -0 -r git rm --cached -r
    ' \
    -- \
    tests-code
# mv files to tests/ dir without new commit
git filter-branch -f \
    --index-filter '
        git ls-files -s \
        | sed "s-\t-&tests/-" \
        | GIT_INDEX_FILE=$GIT_INDEX_FILE.new git update-index --index-info \
        && mv $GIT_INDEX_FILE.new $GIT_INDEX_FILE'

# centralized branch for all selectors code
git checkout --orphan selectors
git rm -r -f .

# merge and rebase separate branches
git merge selector-code
git rebase utils-code
git rebase tests-code

# release branches
git branch -D selector-code utils-code tests-code

# now we can apply selectors patches
for f in $(ls *.patch); do
    git am < $f;
done

# now we can remove selectors from scrapy

# references
# http://git-scm.com/docs/git-filter-branch
# http://git-scm.com/docs/git-ls-tree
# examples
# https://stackoverflow.com/questions/359424/detach-subdirectory-into-separate-git-repository
# https://stackoverflow.com/questions/359424/detach-subdirectory-into-separate-git-repository
# https://github.com/apenwarr/git-subtree/blob/master/git-subtree.txt
# https://stackoverflow.com/questions/6403715/git-how-to-split-off-library-from-project-filter-branch-subtree?rq=1
# https://stackoverflow.com/questions/5998987/splitting-a-set-of-files-within-a-git-repo-into-their-own-repository-preserving
# https://www.kernel.org/pub/software/scm/git/docs/git-filter-branch.html
# http://stackoverflow.com/a/7396584
	#!/bin/bash

	# startover
	git checkout master
	git branch -D selectors selector-code utils-code tests-code

	# split scrapy/selector dir to selector-code branch
	git checkout -b selector-code
	git filter-branch -f --prune-empty \
	--subdirectory-filter scrapy/selector -- selector-code
	# mv files to selectors/ dir without new commit
	git filter-branch -f \
	--index-filter '
	git ls-files -s \
	\| sed "s-\t-&selectors/-" \
	\| GIT_INDEX_FILE=$GIT_INDEX_FILE.new git update-index --index-info \
	&& mv $GIT_INDEX_FILE.new $GIT_INDEX_FILE'

	# now we need to split utils
	git checkout master

	# split scrapy/utils dir to utils-code branch
	git checkout -b utils-code
	git filter-branch -f --prune-empty \
	--subdirectory-filter scrapy/utils -- utils-code
	# only keep required utils files
	git filter-branch -f \
	--prune-empty \
	--index-filter '
	git ls-tree -z -r --name-only --full-tree $GIT_COMMIT \
	\| grep -z -v "^__init__.py$" \
	\| grep -z -v "^decorator.py$" \
	\| grep -z -v "^misc.py$" \
	\| grep -z -v "^python.py$" \
	\| xargs -0 -r git rm --cached -r
	' \
	-- \
	utils-code
	# mv files to selectors/utils/ dir without new commit
	git filter-branch -f \
	--index-filter '
	git ls-files -s \
	\| sed "s-\t-&selectors/utils/-" \
	\| GIT_INDEX_FILE=$GIT_INDEX_FILE.new git update-index --index-info \
	&& mv $GIT_INDEX_FILE.new $GIT_INDEX_FILE'

	# now we need to split tests
	git checkout master

	# split tests dir to tests-code branch
	git checkout -b tests-code
	git filter-branch -f --prune-empty \
	--subdirectory-filter tests -- tests-code
	# only keep required tests files
	git filter-branch -f \
	--prune-empty \
	--index-filter '
	git ls-tree -z -r --name-only --full-tree $GIT_COMMIT \
	\| grep -z -v "^__init__.py$" \
	\| grep -z -v "^test_selector.py$" \
	\| grep -z -v "^test_selector_csstranslator.py$" \
	\| xargs -0 -r git rm --cached -r
	' \
	-- \
	tests-code
	# mv files to tests/ dir without new commit
	git filter-branch -f \
	--index-filter '
	git ls-files -s \
	\| sed "s-\t-&tests/-" \
	\| GIT_INDEX_FILE=$GIT_INDEX_FILE.new git update-index --index-info \
	&& mv $GIT_INDEX_FILE.new $GIT_INDEX_FILE'

	# centralized branch for all selectors code
	git checkout --orphan selectors
	git rm -r -f .

	# merge and rebase separate branches
	git merge selector-code
	git rebase utils-code
	git rebase tests-code

	# release branches
	git branch -D selector-code utils-code tests-code

	# now we can apply selectors patches
	for f in $(ls *.patch); do
	git am < $f;
	done

	# now we can remove selectors from scrapy

	# references
	# http://git-scm.com/docs/git-filter-branch
	# http://git-scm.com/docs/git-ls-tree
	# examples
	# https://stackoverflow.com/questions/359424/detach-subdirectory-into-separate-git-repository
	# https://stackoverflow.com/questions/359424/detach-subdirectory-into-separate-git-repository
	# https://github.com/apenwarr/git-subtree/blob/master/git-subtree.txt
	# https://stackoverflow.com/questions/6403715/git-how-to-split-off-library-from-project-filter-branch-subtree?rq=1
	# https://stackoverflow.com/questions/5998987/splitting-a-set-of-files-within-a-git-repo-into-their-own-repository-preserving
	# https://www.kernel.org/pub/software/scm/git/docs/git-filter-branch.html
	# http://stackoverflow.com/a/7396584