denis-bz/Print_gen.md

## pr.py
#!/usr/bin/python
""" pr( ... x= y= ... ), probj()  for testing """
# must be thousands such
# use with https://docs.python.org/2.7/library/logging.html ?

from __future__ import division
import numpy as np

__version__ = "2016-03-06 mar  denis"

#...............................................................................
def pr( *args, **kwargs ):
    """ pr( "stage 42:", ntop=ntop, df=df ... )
        probj each arg / kwarg, summary line / ndarray
        apply liberally
    """
    print ""
    for arg in args:
        probj( arg )
    for key, val in sorted( kwargs.items() ):
            # sorted, else random not caller's order
        probj( val, key )
    # print ""

#...............................................................................
def probj( x, nm="" ):
    """ print any ? obj:
        string scalar list tuple dict: 1 summary line "nm: list len 42"
        pandas head() tail() with pd.set_option
        numpy ndarray with user's np.set_printoptions
    """
        # sure to be buggy
    if nm:
        print "-- %s:" %  nm ,

    if x is None  or _isstr( x ):
        print x
        return
    if np.isscalar( x ) \
    or (hasattr( x, "ndim" ) and x.ndim == 0):  # np.array( 3 )
        print "%.6g" % x
        return

    if hasattr( x, "name" )  and x.name is not None:  # pandas
        print x.name ,

        # type / classname, shape / len --
    t = getattr( x, "__class__", type(x) )
    print t.__name__ ,
    if hasattr( x, "shape" ):
        print x.shape
        n = x.shape[0]
    else:
        try:
            n = len(x)
            print "len %d" % n
        except (TypeError, AttributeError):  # len() of unsized object ?
            n = np.NaN
            print ""

    if isinstance( x, (tuple, list) ):
        return
    if isinstance( x, dict ):
        print "  keys:", sorted( x.keys() [:10] )
        return

        # pandas DataFrame, Series etc.
        # (to print as ndarray, pr( df.values ))
    if hasattr( x, "head" ):
        if n <= 10:
            print x.head( n )  # with user's pd.set_option( max_rows max_cols ... )
        else:
            print x.head( 3 )
            print "..."
            print x.tail( 3 )
        print ""
        return

    if hasattr( x, "values" ):
        x = x.values
    if hasattr( x, "dtype" ):  # np array  kind O ?
        print x  # with user's np.set_printoptions
    print ""


def _isstr( x ):
    """ basestring or np.array( "str" ) """
    return isinstance( x, basestring ) \
    or np.issubdtype( getattr( x, "dtype", "i4"), np.string_ )  # ?


#...............................................................................
if __name__ == "__main__":
    from collections import namedtuple

    np.set_printoptions( threshold=100, edgeitems=10, linewidth=140,
        formatter = dict( float = lambda x: "%.2g" % x ))  # float arrays %.2g
    # pd.set_option( "display.width", 140, "display.precision", 2 )

    class C:
        pass
    c = C()
    Namedtuple = namedtuple( "Namedtuple", "x y" )

    pr(
    adict   = { 1:2, 3:4 },
    alist   = [1, 2],
    array0d = np.array( 3 ),
    none    = None,
    arraynone = [None],
    arraystr = np.array("string"),
    array2  = np.array([ "string", 3 ]),
    eye     = np.eye( 3 ) * np.pi,
    pi      = np.pi,
    arraypi = np.array( np.pi ),
    s       = "string",

    C=C,
    c=c,

    Namedtuple = Namedtuple,
    anamedtuple = Namedtuple( 1, 2 ),
    )


## Print_gen.md

      
    Raw
  

              Print_gen.md
            
          
    Purpose: generate "print ..." to help follow code.
print_gen.py expands lines like
p var = expr

-->
print( '''>> var = expr''' )
var = expr
pr( var )

pr() can be pr = print, or a custom printer such as pr.py .
Example: groupby.p --> groupby.py .
Keywords: software testing, print statements, generate, preprocessor, python

  
## print_gen.py
#!/usr/bin/env python
""" expand lines "p var = expr" -->
        print( '''>> var = expr''' )
        var = expr
        pr( var )

    Purpose: generate "print ..." to help follow code.
    `pr()` can be `pr = print`, or a custom printer such as `pr.py` .
"""
# odd, useful ? 4 possiblities


import re  # http://docs.python.org/2.7/library/re.html

__version__ = "2016-03-06 mar  denis"


p_var_eq_expr_pat = re.compile(
    r"(\s*)p \s+ (.+) \s+ = \s+ (.*)", re.X )
    #      p     lhs      =     anything

#...............................................................................
def pline( line ):
    """ lines "p var = expr" --> print, var = expr, pr() """
    m = p_var_eq_expr_pat.match( line )
    if not m:
        return line
    space, var, expr = m.groups()
    return \
"""%sprint( '''\\n>> %s = %s''' )
%s%s = %s
%spr( %s )
""" % (
    space, var, expr,
    space, var, expr,
    space, var )

#...............................................................................
def plines( lines ):
    """ plines( filename )
        plines( lines list or iter )  e.g. f.readlines()
    """
    if not isinstance( lines, basestring ):
        for line in lines:
            print pline( line ) ,  # lines with \n
    else:
        with open( lines, 'r' ) as lines:
            for line in lines:
                print pline( line ) ,

#...............................................................................
if __name__ == "__main__":
    import sys

    if len(sys.argv) > 1:
        for filename in sys.argv[1:]:
            plines( filename )
    else:
        lines = """
p var = expr
    p  var  =  expr

p = 0
p, q = 1, 2
"""
        plines([ line + "\n"
                for line in lines.split("\n") ])


## z-groupby.p
""" simple examples of pandas groupby """
# p-py: this.p -> this.py

from __future__ import division
import sys
import numpy as np
import pandas as pd
from bz.etc.pr import pr

np.set_printoptions( threshold=20, edgeitems=10, linewidth=140,
        formatter = dict( float = lambda x: "%.2g" % x ))  # float arrays %.2g
pd.set_option( "display.width", 140,
        "display.max_rows", 20,
        "display.precision", 2 )
print "\n", 80 * "-"


n = 10
seed = 0

    # to change these params in sh or ipython, run this.py  a=1  b=None  c=[3,4] ...
for arg in sys.argv[1:]:
    exec( arg )

np.random.seed( seed )

#...............................................................................
A = np.random.randint( 0, 2+1, n )
B = np.random.exponential( size=n )
X = np.array([ "x", "y", "z" ]) [np.random.randint( 0, 2+1, n )]

p df = pd.DataFrame( dict( A=A, B=B, X=X )) .sort_values( by=["A", "B"] )

p Agr = df.groupby( "A" )  # pr() head(3) confusing
print "\ngroups:", Agr.groups
print "\nindices:", Agr.indices
p Acount = Agr.count()
p Ahead = Agr.head(1)

p ABcount = Agr .B .count()
p ABsum = Agr .B .sum()

p Atrans = Agr .transform( np.sum )  # ?
p Btrans = Agr .B .transform( np.sum )
p agg = Agr.agg( np.sum )  # num cols only


## z-groupby.py
""" simple examples of pandas groupby """
# p-py: this.p -> this.py

from __future__ import division
import sys
import numpy as np
import pandas as pd
from bz.etc.pr import pr

np.set_printoptions( threshold=20, edgeitems=10, linewidth=140,
        formatter = dict( float = lambda x: "%.2g" % x ))  # float arrays %.2g
pd.set_option( "display.width", 140,
        "display.max_rows", 20,
        "display.precision", 2 )
print "\n", 80 * "-"


n = 10
seed = 0

    # to change these params in sh or ipython, run this.py  a=1  b=None  c=[3,4] ...
for arg in sys.argv[1:]:
    exec( arg )

np.random.seed( seed )

#...............................................................................
A = np.random.randint( 0, 2+1, n )
B = np.random.exponential( size=n )
X = np.array([ "x", "y", "z" ]) [np.random.randint( 0, 2+1, n )]

print( '''\n>> df = pd.DataFrame( dict( A=A, B=B, X=X )) .sort_values( by=["A", "B"] )''' )
df = pd.DataFrame( dict( A=A, B=B, X=X )) .sort_values( by=["A", "B"] )
pr( df )

print( '''\n>> Agr = df.groupby( "A" )  # pr() head(3) confusing''' )
Agr = df.groupby( "A" )  # pr() head(3) confusing
pr( Agr )
print "\ngroups:", Agr.groups
print "\nindices:", Agr.indices
print( '''\n>> Acount = Agr.count()''' )
Acount = Agr.count()
pr( Acount )
print( '''\n>> Ahead = Agr.head(1)''' )
Ahead = Agr.head(1)
pr( Ahead )

print( '''\n>> ABcount = Agr .B .count()''' )
ABcount = Agr .B .count()
pr( ABcount )
print( '''\n>> ABsum = Agr .B .sum()''' )
ABsum = Agr .B .sum()
pr( ABsum )

print( '''\n>> Atrans = Agr .transform( np.sum )  # ?''' )
Atrans = Agr .transform( np.sum )  # ?
pr( Atrans )
print( '''\n>> Btrans = Agr .B .transform( np.sum )''' )
Btrans = Agr .B .transform( np.sum )
pr( Btrans )
print( '''\n>> agg = Agr.agg( np.sum )  # num cols only''' )
agg = Agr.agg( np.sum )  # num cols only
pr( agg )
	#!/usr/bin/python
	""" pr( ... x= y= ... ), probj() for testing """
	# must be thousands such
	# use with https://docs.python.org/2.7/library/logging.html ?

	from __future__ import division
	import numpy as np

	__version__ = "2016-03-06 mar denis"

	#...............................................................................
	def pr( args, *kwargs ):
	""" pr( "stage 42:", ntop=ntop, df=df ... )
	probj each arg / kwarg, summary line / ndarray
	apply liberally
	"""
	print ""
	for arg in args:
	probj( arg )
	for key, val in sorted( kwargs.items() ):
	# sorted, else random not caller's order
	probj( val, key )
	# print ""

	#...............................................................................
	def probj( x, nm="" ):
	""" print any ? obj:
	string scalar list tuple dict: 1 summary line "nm: list len 42"
	pandas head() tail() with pd.set_option
	numpy ndarray with user's np.set_printoptions
	"""
	# sure to be buggy
	if nm:
	print "-- %s:" % nm ,

	if x is None or _isstr( x ):
	print x
	return
	if np.isscalar( x ) \
	or (hasattr( x, "ndim" ) and x.ndim == 0): # np.array( 3 )
	print "%.6g" % x
	return

	if hasattr( x, "name" ) and x.name is not None: # pandas
	print x.name ,

	# type / classname, shape / len --
	t = getattr( x, "__class__", type(x) )
	print t.__name__ ,
	if hasattr( x, "shape" ):
	print x.shape
	n = x.shape[0]
	else:
	try:
	n = len(x)
	print "len %d" % n
	except (TypeError, AttributeError): # len() of unsized object ?
	n = np.NaN
	print ""

	if isinstance( x, (tuple, list) ):
	return
	if isinstance( x, dict ):
	print " keys:", sorted( x.keys() [:10] )
	return

	# pandas DataFrame, Series etc.
	# (to print as ndarray, pr( df.values ))
	if hasattr( x, "head" ):
	if n <= 10:
	print x.head( n ) # with user's pd.set_option( max_rows max_cols ... )
	else:
	print x.head( 3 )
	print "..."
	print x.tail( 3 )
	print ""
	return

	if hasattr( x, "values" ):
	x = x.values
	if hasattr( x, "dtype" ): # np array kind O ?
	print x # with user's np.set_printoptions
	print ""


	def _isstr( x ):
	""" basestring or np.array( "str" ) """
	return isinstance( x, basestring ) \
	or np.issubdtype( getattr( x, "dtype", "i4"), np.string_ ) # ?


	#...............................................................................
	if __name__ == "__main__":
	from collections import namedtuple

	np.set_printoptions( threshold=100, edgeitems=10, linewidth=140,
	formatter = dict( float = lambda x: "%.2g" % x )) # float arrays %.2g
	# pd.set_option( "display.width", 140, "display.precision", 2 )

	class C:
	pass
	c = C()
	Namedtuple = namedtuple( "Namedtuple", "x y" )

	pr(
	adict = { 1:2, 3:4 },
	alist = [1, 2],
	array0d = np.array( 3 ),
	none = None,
	arraynone = [None],
	arraystr = np.array("string"),
	array2 = np.array([ "string", 3 ]),
	eye = np.eye( 3 ) * np.pi,
	pi = np.pi,
	arraypi = np.array( np.pi ),
	s = "string",

	C=C,
	c=c,

	Namedtuple = Namedtuple,
	anamedtuple = Namedtuple( 1, 2 ),
	)
	#!/usr/bin/env python
	""" expand lines "p var = expr" -->
	print( '''>> var = expr''' )
	var = expr
	pr( var )

	Purpose: generate "print ..." to help follow code.
	`pr()` can be `pr = print`, or a custom printer such as `pr.py` .
	"""
	# odd, useful ? 4 possiblities


	import re # http://docs.python.org/2.7/library/re.html

	__version__ = "2016-03-06 mar denis"


	p_var_eq_expr_pat = re.compile(
	r"(\s)p \s+ (.+) \s+ = \s+ (.)", re.X )
	# p lhs = anything

	#...............................................................................
	def pline( line ):
	""" lines "p var = expr" --> print, var = expr, pr() """
	m = p_var_eq_expr_pat.match( line )
	if not m:
	return line
	space, var, expr = m.groups()
	return \
	"""%sprint( '''\\n>> %s = %s''' )
	%s%s = %s
	%spr( %s )
	""" % (
	space, var, expr,
	space, var, expr,
	space, var )

	#...............................................................................
	def plines( lines ):
	""" plines( filename )
	plines( lines list or iter ) e.g. f.readlines()
	"""
	if not isinstance( lines, basestring ):
	for line in lines:
	print pline( line ) , # lines with \n
	else:
	with open( lines, 'r' ) as lines:
	for line in lines:
	print pline( line ) ,

	#...............................................................................
	if __name__ == "__main__":
	import sys

	if len(sys.argv) > 1:
	for filename in sys.argv[1:]:
	plines( filename )
	else:
	lines = """
	p var = expr
	p var = expr

	p = 0
	p, q = 1, 2
	"""
	plines([ line + "\n"
	for line in lines.split("\n") ])
	""" simple examples of pandas groupby """
	# p-py: this.p -> this.py

	from __future__ import division
	import sys
	import numpy as np
	import pandas as pd
	from bz.etc.pr import pr

	np.set_printoptions( threshold=20, edgeitems=10, linewidth=140,
	formatter = dict( float = lambda x: "%.2g" % x )) # float arrays %.2g
	pd.set_option( "display.width", 140,
	"display.max_rows", 20,
	"display.precision", 2 )
	print "\n", 80 * "-"


	n = 10
	seed = 0

	# to change these params in sh or ipython, run this.py a=1 b=None c=[3,4] ...
	for arg in sys.argv[1:]:
	exec( arg )

	np.random.seed( seed )

	#...............................................................................
	A = np.random.randint( 0, 2+1, n )
	B = np.random.exponential( size=n )
	X = np.array([ "x", "y", "z" ]) [np.random.randint( 0, 2+1, n )]

	p df = pd.DataFrame( dict( A=A, B=B, X=X )) .sort_values( by=["A", "B"] )

	p Agr = df.groupby( "A" ) # pr() head(3) confusing
	print "\ngroups:", Agr.groups
	print "\nindices:", Agr.indices
	p Acount = Agr.count()
	p Ahead = Agr.head(1)

	p ABcount = Agr .B .count()
	p ABsum = Agr .B .sum()

	p Atrans = Agr .transform( np.sum ) # ?
	p Btrans = Agr .B .transform( np.sum )
	p agg = Agr.agg( np.sum ) # num cols only