sente/python_snippets.py

## python_snippets.py
#!/usr/bin/python

# from reddit --------------------------------------------
def traceit(fn):
    import sys
    def new_fn(*a,**kw):
        ret = fn(*a,**kw)
        sys.stderr.write("Fn: %s; a=%s; kw=%s\nRet: %s\n"
                         % (fn,a,kw,ret))
        return ret
    return new_fn


# from reddit --------------------------------------------
def to_csv(table):
    # commas and linebreaks must result in a quoted string
    def quote_commas(x):
        if ',' in x or '\n' in x:
            return u'"%s"' % x.replace('"', '""')
        return x
    return u"\n".join(u','.join(quote_commas(y) for y in x) for x in table)


# from reddit --------------------------------------------
def get_title(url):
    """Fetches the contents of url and extracts (and utf-8 encodes)
       the contents of <title>"""

    from urllib2 import urlopen
    from BeautifulSoup import BeautifulSoup

    if not url or not url.startswith('http://'):
        return None


    try:
        # if we don't find it in the first kb of the resource, we
        # probably won't find it
        opener = urlopen(url, timeout=15)
        text = opener.read(1024)
        opener.close()
        bs = BeautifulSoup(text)
        if not bs:
            return

        title_bs = bs.first('title')

        if not title_bs or title_bs.children:
            return

        return title_bs.string.encode('utf-8')
#        return title_bs.text.encode('utf-8')

    except:
        return None


# from reddit --------------------------------------------
def timeit(func):
    "Run some function, and return (RunTimeInSeconds,Result)"
    before=time.time()
    res=func()
    return (time.time()-before,res)

# from reddit --------------------------------------------
def lineno():
    "Returns the current line number in our program."
    import inspect
    print "%s\t%s" % (datetime.now(),inspect.currentframe().f_back.f_lineno)

# from reddit --------------------------------------------
def IteratorFilter(iterator, fn):
    for x in iterator:
        if fn(x):
            yield x

# from reddit --------------------------------------------
def UniqueIterator(iterator, key = lambda x: x):
    """
    Takes an iterator and returns an iterator that returns only the
    first occurence of each entry

    #[u for u in (UniqueIterator([0,2,4,3,1,5,1,3,5,1,9,5]))]
    #[0, 2, 4, 3, 1, 5, 9]
    """
    so_far = set()
    def no_dups(x):
        k = key(x)
        if k in so_far:
            return False
        else:
            so_far.add(k)
            return True

    return IteratorFilter(iterator, no_dups)

#def yield_content(somedir):
#    "yield the lines of all python files within 'somedir'"
#
#    files = [os.path.join(somedir,f) for f in os.listdir(somedir) if  f.endswith(".py")]
#    for f in files:
#        for line in open(f).readlines():
#            yield line.rstrip("\r\n")
#
#
#[u for u in UniqueIterator(IteratorFilter(yield_content('/home/stu/'),lambda x: x.find('import')==0))]]
#['import sys',
# 'import sets',
# 'import itertools',
# 'import pprint',
# 'import re',
# 'import datetime',

# from reddit --------------------------------------------
def in_chunks(it, size=25):
    """
    for abc in in_chunks("ABCDEFGHIJKLMNOPQRSTUVWXYZ",size=3):
        print abc

    > ['A', 'B', 'C']
    > ['D', 'E', 'F']
    > ['G', 'H', 'I']
    > ['J', 'K', 'L']
    > ['M', 'N', 'O']
    > ['P', 'Q', 'R']
    > ['S', 'T', 'U']
    > ['V', 'W', 'X']
    > ['Y', 'Z']

    """
    chunk = []
    it = iter(it)
    try:
        while True:
            chunk.append(it.next())
            if len(chunk) >= size:
                yield chunk
                chunk = []
    except StopIteration:
        if chunk:
            yield chunk


# http://stackoverflow.com/questions/101268/hidden-features-of-python/116391#116391
class AttrDict(dict)m
    """
    person = AttrDict({'name': 'John Doe', 'age': 66})
    print person['name']
    print person.name

    person.name = 'Frodo G'
    print person.name

    del person.age

    print person
    """

    def __getattr__(self, name):
        if name in self:
            return self[name]
        raise AttributeError('%s not found' % name)

    def __setattr__(self, name, value):
        self[name] = value

    def __delattr__(self, name):
        del self[name]


#>>> horses = [1, 2, 3, 4]
#>>> races = itertools.permutations(horses)
#>>> print(races)
#<itertools.permutations object at 0xb754f1dc>
#>>> print(list(itertools.permutations(horses)))
#[(1, 2, 3, 4),
# (1, 2, 4, 3),
# (1, 3, 2, 4),
# (1, 3, 4, 2),
# (1, 4, 2, 3),
# (1, 4, 3, 2),
# (2, 1, 3, 4),
# (2, 1, 4, 3),
# (2, 3, 1, 4),
# (2, 3, 4, 1),
# (2, 4, 1, 3),
# (2, 4, 3, 1),
# (3, 1, 2, 4),
# (3, 1, 4, 2),
# (3, 2, 1, 4),
# (3, 2, 4, 1),
# (3, 4, 1, 2),
# (3, 4, 2, 1),
# (4, 1, 2, 3),
# (4, 1, 3, 2),
# (4, 2, 1, 3),
# (4, 2, 3, 1),
# (4, 3, 1, 2),
# (4, 3, 2, 1)]
#


# from IPython --------------------------------------------
def marquee(txt='',width=78,mark='*'):
    """Return the input string centered in a 'marquee'.

    :Examples:

        In [16]: marquee('A test',40)
        Out[16]: '**************** A test ****************'

        In [17]: marquee('A test',40,'-')
        Out[17]: '---------------- A test ----------------'

        In [18]: marquee('A test',40,' ')
        Out[18]: '                 A test                 '

    """
    if not txt:
        return (mark*width)[:width]
    nmark = (width-len(txt)-2)/len(mark)/2
    if nmark < 0:
        nmark = 0
    marks = mark*nmark
    return '%s %s %s' % (marks,txt,marks)


# from IPython --------------------------------------------
def page_file(fname,start = 0, pager_cmd = None):
    """Page a file, using an optional pager command and starting line.
    """

    pager_cmd = get_pager_cmd(pager_cmd)
    pager_cmd += ' ' + get_pager_start(pager_cmd,start)

    try:
        if os.environ['TERM'] in ['emacs','dumb']:
            raise EnvironmentError
        xsys(pager_cmd + ' ' + fname)
    except:
        try:
            if start > 0:
                start -= 1
            page(open(fname).read(),start)
        except:
            print 'Unable to show file',`fname`


# from IPython --------------------------------------------
def chop(seq,size):
    """Chop a sequence into chunks of the given size."""
    chunk = lambda i: seq[i:i+size]
    return map(chunk,xrange(0,len(seq),size))


# from IPython --------------------------------------------
def uniq_stable(elems):
    """uniq_stable(elems) -> list

    Return from an iterable, a list of all the unique elements in the input,
    but maintaining the order in which they first appear.

    A naive solution to this problem which just makes a dictionary with the
    elements as keys fails to respect the stability condition, since
    dictionaries are unsorted by nature.

    Note: All elements in the input must be valid dictionary keys for this
    routine to work, as it internally uses a dictionary for efficiency
    reasons."""

    unique = []
    unique_dict = {}
    for nn in elems:
        if nn not in unique_dict:
            unique.append(nn)
            unique_dict[nn] = None
    return unique


# from IPython --------------------------------------------
def flatten(seq):
    """Flatten a list of lists (NOT recursive, only works for 2d lists)."""

    return [x for subseq in seq for x in subseq]

# from IPython --------------------------------------------
def get_slice(seq,start=0,stop=None,step=1):
    """Get a slice of a sequence with variable step. Specify start,stop,step."""
    if stop == None:
        stop = len(seq)
    item = lambda i: seq[i]
    return map(item,xrange(start,stop,step))

# from IPython --------------------------------------------
def chop(seq,size):
    """Chop a sequence into chunks of the given size."""
    chunk = lambda i: seq[i:i+size]
    return map(chunk,xrange(0,len(seq),size))

# from IPython --------------------------------------------
from itertools import izip_longest
[x for x in izip_longest(range(5),range(10),range(15),fillvalue=None)]
#[(0, 0, 0),
# (1, 1, 1),
# (2, 2, 2),
# (3, 3, 3),
# (4, 4, 4),
# (None, 5, 5),
# (None, 6, 6),
# (None, 7, 7),
# (None, 8, 8),
# (None, 9, 9),
# (None, None, 10),
# (None, None, 11),
# (None, None, 12),
# (None, None, 13),
# (None, None, 14)]


# from IPython --------------------------------------------
class countCalls(object):
    """ decorator replaces a function with a "countCalls" instance
    which behaves like the original function, but keeps track of calls

    >>> @countCalls
    ... def doNothing():
    ...     pass
    >>> doNothing()
    >>> doNothing()
    >>> print doNothing.timesCalled
    2
    """
    def __init__ (self, functionToTrack):
        self.functionToTrack = functionToTrack
        self.timesCalled = 0
    def __call__ (self, *args, **kwargs):
        self.timesCalled += 1
        return self.functionToTrack(*args, **kwargs)
	#!/usr/bin/python

	# from reddit --------------------------------------------
	def traceit(fn):
	import sys
	def new_fn(a,*kw):
	ret = fn(a,*kw)
	sys.stderr.write("Fn: %s; a=%s; kw=%s\nRet: %s\n"
	% (fn,a,kw,ret))
	return ret
	return new_fn


	# from reddit --------------------------------------------
	def to_csv(table):
	# commas and linebreaks must result in a quoted string
	def quote_commas(x):
	if ',' in x or '\n' in x:
	return u'"%s"' % x.replace('"', '""')
	return x
	return u"\n".join(u','.join(quote_commas(y) for y in x) for x in table)


	# from reddit --------------------------------------------
	def get_title(url):
	"""Fetches the contents of url and extracts (and utf-8 encodes)
	the contents of <title>"""

	from urllib2 import urlopen
	from BeautifulSoup import BeautifulSoup

	if not url or not url.startswith('http://'):
	return None


	try:
	# if we don't find it in the first kb of the resource, we
	# probably won't find it
	opener = urlopen(url, timeout=15)
	text = opener.read(1024)
	opener.close()
	bs = BeautifulSoup(text)
	if not bs:
	return

	title_bs = bs.first('title')

	if not title_bs or title_bs.children:
	return

	return title_bs.string.encode('utf-8')
	# return title_bs.text.encode('utf-8')

	except:
	return None




	# from reddit --------------------------------------------
	def timeit(func):
	"Run some function, and return (RunTimeInSeconds,Result)"
	before=time.time()
	res=func()
	return (time.time()-before,res)

	# from reddit --------------------------------------------
	def lineno():
	"Returns the current line number in our program."
	import inspect
	print "%s\t%s" % (datetime.now(),inspect.currentframe().f_back.f_lineno)

	# from reddit --------------------------------------------
	def IteratorFilter(iterator, fn):
	for x in iterator:
	if fn(x):
	yield x

	# from reddit --------------------------------------------
	def UniqueIterator(iterator, key = lambda x: x):
	"""
	Takes an iterator and returns an iterator that returns only the
	first occurence of each entry

	#[u for u in (UniqueIterator([0,2,4,3,1,5,1,3,5,1,9,5]))]
	#[0, 2, 4, 3, 1, 5, 9]
	"""
	so_far = set()
	def no_dups(x):
	k = key(x)
	if k in so_far:
	return False
	else:
	so_far.add(k)
	return True

	return IteratorFilter(iterator, no_dups)

	#def yield_content(somedir):
	# "yield the lines of all python files within 'somedir'"
	#
	# files = [os.path.join(somedir,f) for f in os.listdir(somedir) if f.endswith(".py")]
	# for f in files:
	# for line in open(f).readlines():
	# yield line.rstrip("\r\n")
	#
	#
	#[u for u in UniqueIterator(IteratorFilter(yield_content('/home/stu/'),lambda x: x.find('import')==0))]]
	#['import sys',
	# 'import sets',
	# 'import itertools',
	# 'import pprint',
	# 'import re',
	# 'import datetime',

	# from reddit --------------------------------------------
	def in_chunks(it, size=25):
	"""
	for abc in in_chunks("ABCDEFGHIJKLMNOPQRSTUVWXYZ",size=3):
	print abc

	> ['A', 'B', 'C']
	> ['D', 'E', 'F']
	> ['G', 'H', 'I']
	> ['J', 'K', 'L']
	> ['M', 'N', 'O']
	> ['P', 'Q', 'R']
	> ['S', 'T', 'U']
	> ['V', 'W', 'X']
	> ['Y', 'Z']

	"""
	chunk = []
	it = iter(it)
	try:
	while True:
	chunk.append(it.next())
	if len(chunk) >= size:
	yield chunk
	chunk = []
	except StopIteration:
	if chunk:
	yield chunk




	# http://stackoverflow.com/questions/101268/hidden-features-of-python/116391#116391
	class AttrDict(dict)m
	"""
	person = AttrDict({'name': 'John Doe', 'age': 66})
	print person['name']
	print person.name

	person.name = 'Frodo G'
	print person.name

	del person.age

	print person
	"""

	def __getattr__(self, name):
	if name in self:
	return self[name]
	raise AttributeError('%s not found' % name)

	def __setattr__(self, name, value):
	self[name] = value

	def __delattr__(self, name):
	del self[name]




	#>>> horses = [1, 2, 3, 4]
	#>>> races = itertools.permutations(horses)
	#>>> print(races)
	#<itertools.permutations object at 0xb754f1dc>
	#>>> print(list(itertools.permutations(horses)))
	#[(1, 2, 3, 4),
	# (1, 2, 4, 3),
	# (1, 3, 2, 4),
	# (1, 3, 4, 2),
	# (1, 4, 2, 3),
	# (1, 4, 3, 2),
	# (2, 1, 3, 4),
	# (2, 1, 4, 3),
	# (2, 3, 1, 4),
	# (2, 3, 4, 1),
	# (2, 4, 1, 3),
	# (2, 4, 3, 1),
	# (3, 1, 2, 4),
	# (3, 1, 4, 2),
	# (3, 2, 1, 4),
	# (3, 2, 4, 1),
	# (3, 4, 1, 2),
	# (3, 4, 2, 1),
	# (4, 1, 2, 3),
	# (4, 1, 3, 2),
	# (4, 2, 1, 3),
	# (4, 2, 3, 1),
	# (4, 3, 1, 2),
	# (4, 3, 2, 1)]
	#




	# from IPython --------------------------------------------
	def marquee(txt='',width=78,mark='*'):
	"""Return the input string centered in a 'marquee'.

	:Examples:

	In [16]: marquee('A test',40)
	Out[16]: '************** A test **************'

	In [17]: marquee('A test',40,'-')
	Out[17]: '---------------- A test ----------------'

	In [18]: marquee('A test',40,' ')
	Out[18]: ' A test '

	"""
	if not txt:
	return (mark*width)[:width]
	nmark = (width-len(txt)-2)/len(mark)/2
	if nmark < 0:
	nmark = 0
	marks = mark*nmark
	return '%s %s %s' % (marks,txt,marks)


	# from IPython --------------------------------------------
	def page_file(fname,start = 0, pager_cmd = None):
	"""Page a file, using an optional pager command and starting line.
	"""

	pager_cmd = get_pager_cmd(pager_cmd)
	pager_cmd += ' ' + get_pager_start(pager_cmd,start)

	try:
	if os.environ['TERM'] in ['emacs','dumb']:
	raise EnvironmentError
	xsys(pager_cmd + ' ' + fname)
	except:
	try:
	if start > 0:
	start -= 1
	page(open(fname).read(),start)
	except:
	print 'Unable to show file',`fname`


	# from IPython --------------------------------------------
	def chop(seq,size):
	"""Chop a sequence into chunks of the given size."""
	chunk = lambda i: seq[i:i+size]
	return map(chunk,xrange(0,len(seq),size))


	# from IPython --------------------------------------------
	def uniq_stable(elems):
	"""uniq_stable(elems) -> list

	Return from an iterable, a list of all the unique elements in the input,
	but maintaining the order in which they first appear.

	A naive solution to this problem which just makes a dictionary with the
	elements as keys fails to respect the stability condition, since
	dictionaries are unsorted by nature.

	Note: All elements in the input must be valid dictionary keys for this
	routine to work, as it internally uses a dictionary for efficiency
	reasons."""

	unique = []
	unique_dict = {}
	for nn in elems:
	if nn not in unique_dict:
	unique.append(nn)
	unique_dict[nn] = None
	return unique


	# from IPython --------------------------------------------
	def flatten(seq):
	"""Flatten a list of lists (NOT recursive, only works for 2d lists)."""

	return [x for subseq in seq for x in subseq]

	# from IPython --------------------------------------------
	def get_slice(seq,start=0,stop=None,step=1):
	"""Get a slice of a sequence with variable step. Specify start,stop,step."""
	if stop == None:
	stop = len(seq)
	item = lambda i: seq[i]
	return map(item,xrange(start,stop,step))

	# from IPython --------------------------------------------
	def chop(seq,size):
	"""Chop a sequence into chunks of the given size."""
	chunk = lambda i: seq[i:i+size]
	return map(chunk,xrange(0,len(seq),size))

	# from IPython --------------------------------------------
	from itertools import izip_longest
	[x for x in izip_longest(range(5),range(10),range(15),fillvalue=None)]
	#[(0, 0, 0),
	# (1, 1, 1),
	# (2, 2, 2),
	# (3, 3, 3),
	# (4, 4, 4),
	# (None, 5, 5),
	# (None, 6, 6),
	# (None, 7, 7),
	# (None, 8, 8),
	# (None, 9, 9),
	# (None, None, 10),
	# (None, None, 11),
	# (None, None, 12),
	# (None, None, 13),
	# (None, None, 14)]


	# from IPython --------------------------------------------
	class countCalls(object):
	""" decorator replaces a function with a "countCalls" instance
	which behaves like the original function, but keeps track of calls

	>>> @countCalls
	... def doNothing():
	... pass
	>>> doNothing()
	>>> doNothing()
	>>> print doNothing.timesCalled
	2
	"""
	def __init__ (self, functionToTrack):
	self.functionToTrack = functionToTrack
	self.timesCalled = 0
	def __call__ (self, args, *kwargs):
	self.timesCalled += 1
	return self.functionToTrack(args, *kwargs)