runsun/find.py

## find.py
__description__ = "Find lines where a word is defined in a folder."
__version__= "171115-1"
__author__= "Runsun Pan"

'''
>>> py find.py -h
usage: find.py [-h] [--version] [-f folder] [-p prefix] [-a activeModule]
               [-i includePrefix] [-x fileExt]
               word

Find lines where a word is defined in a folder. Runsun Pan

positional arguments:
  word              Target word or regex pattern string

optional arguments:
  -h, --help        show this help message and exit
  --version         Shows version and exits.
  -f folder         Target folder
  -p prefix         prefix. Could be a str in regexp form.
                    Default='function|module'
  -a activeModule   Search for modules that are set to run at this moment by
                    matching 'xxx(...);'. Useful for complex lib.
  -i includePrefix  Int. 1 = Include prefix defined in -p
  -x fileExt        Specifiy file extension like doc, py ... etc. Default:scad


Note: previous version was wrongfully dated as 2017.10.5
      (should have been 2017.10.4)

Current ver 2017.10.4-2:
  -- more structural and clearer display
  -- output is grouped by file

ver 2017.10.4:
This new version (2017.10.4) added couple options: prefix, file
extension inclusion/exclusion, error handling. The target word to
be search could be a string representation of python regexp pattern.

Example:

>>> py find.py getPlane -f scadx -i 0
+================================================================================
| Matches:
| scadx_geometry.scad:
|   #4769: function getPlaneByNormalLine(pq)=
|   #4780:     module getPlaneByNormalLine_demo()
|   #4783:         pl = getPlaneByNormalLine( pq );
|   #5362:     ? let( int= projPt( pt, getPlaneByNormalLine(pl)) )
|   #5365:     ? let( int= projPt( pt, getPlaneByNormalLine( [ORIGIN, pl])) )
|   #5400:     pl = expandPts( getPlaneByNormalLine(pq), 1);
|   #5427:     pl = expandPts( getPlaneByNormalLine( [ORIGIN, pt2]), 1);
| Untitled.scad:
|   #13: //getPlaneByNormalLine_demo();
+================================================================================
::: Matching re.compile('getPlane') in folder 'scadx':
::: Arguments Namespace(f='scadx', i=0, p='function|module', word='getPlane', x='scad')

Match 8 times in 2 files (see above for details):

scadx/scadx_geometry.scad: 7
scadx/Untitled.scad: 1

With 1 file errors:
(0)'scad_string.scad': UnicodeDecodeError: 'charmap' codec can't decode byte 0x8d in position 327: character maps to <undefined>


'''

import os, sys, re, argparse, traceback

# files with the following extentions are excluded
EXCLUDEFILES=["png","gif", "bmp", "lnk"]

# argument setup and parse
argp = argparse.ArgumentParser(
       prog= __file__
       ,description=  __description__+ "\n" + __author__ )
argp.add_argument('--version', action='version'
                 , version='%(prog)s '+__version__
                 , help='Shows version and exits.')
argp.add_argument("word", metavar="word", type=str, help="Target word or regex pattern string" )
argp.add_argument("-f", metavar="folder", type=str, default=".", help="Target folder")
argp.add_argument("-p", metavar="prefix", type=str, default="function|module"
                 , help="prefix. Could be a str in regexp form. Default='function|module'")
argp.add_argument("-a", metavar="activeModule", type=bool
                  , default=0
                  , help="Search for modules that are set to run at this moment by matching 'xxx(...);'. Useful for complex lib."
                  )
argp.add_argument("-i", metavar="includePrefix", type=int
                  , default=1
                  , help="Int. 1 = Include prefix defined in -p"
                  )

argp.add_argument('-x', metavar="fileExt", type=str, default="scad"
                 , help="Specifiy file extension like doc, py ... etc. Default:scad"
                 )

args= argp.parse_args()


foldername = args.f
word = args.word

if args.a==True:
  ptn_str= "^\w*\([\w=\.\-,\[\] ]*\);"
elif args.i:
  ptn_str = "^("+args.p+") "+ "{0}".format(word)
else:
  ptn_str = "{0}".format(word)

pattern = re.compile(ptn_str)

#print("ptn_str: '%s'"%ptn_str)
print( '+'+"===="*20)
print('| Matches:')

mcount=0
fcount=0
errors = []
files = {}

def isValidFile( fn ):
  ''' Decide if a file is valid based on its extension '''
  if args.x:
     return fn.endswith("."+args.x)
  else:
     return not sum( [ fn.endswith("."+x) and 1 or 0 for x in EXCLUDEFILES ]) > 0


for f in os.listdir( foldername ):
  if ( os.path.isfile(os.path.join(foldername, f)) and
       isValidFile(f)
     ):
    fullpath = os.path.join(foldername, f)
    fcount=fcount+1
    thisFileHasMatches = False
    try:
      lines = open(fullpath, "r").read().split("\n")
      for i,ln in enumerate(lines):
        if args.i==0:
           #match = word in ln or pattern.search("{0}".format(ln))
           match = pattern.search("{0}".format(ln))
           if match:
             if not thisFileHasMatches:
             	print("| "+f+":")
             	thisFileHasMatches = True
             mcount = mcount+1
             files.setdefault(fullpath,0)
             files[fullpath] += 1
             print( "|   #%s: %s "%(i, ln)) #.strip()) )
        else:
           #match = pattern.match(ln) or pattern.search("{0}".format(ln))
           match = pattern.search("{0}".format(ln))
           if match:
             if not thisFileHasMatches:
             	print("| "+f+":")
             	thisFileHasMatches = True
             mcount = mcount+1
             files.setdefault(fullpath,0)
             files[fullpath] += 1
             print( "|  #%s: %s "%(i, ln.strip()) )
    except:
      errors.append(
        "'%s': %s"%(f,traceback.format_exc().split("\n")[-2])
      )

print('+'+'===='*20)
print( "::: Matching '%s' in folder '%s':"%('{0}'.format(ptn_str), foldername) )
print( "::: Arguments %s"%args )
print('\nMatch %s times in %s files (see above for details):\n'%( mcount
     , len(files.keys()) ))
print( '\n'.join( [ '%s: %s'%(x,y) for x,y in files.items() ] ) )
print('\nWith %s file errors:'%( len(errors)) )
print( '\n'.join( [ "(%i)%s"%(i,x) for i,x in enumerate(errors)] ) )
	__description__ = "Find lines where a word is defined in a folder."
	__version__= "171115-1"
	__author__= "Runsun Pan"

	'''
	>>> py find.py -h
	usage: find.py [-h] [--version] [-f folder] [-p prefix] [-a activeModule]
	[-i includePrefix] [-x fileExt]
	word

	Find lines where a word is defined in a folder. Runsun Pan

	positional arguments:
	word Target word or regex pattern string

	optional arguments:
	-h, --help show this help message and exit
	--version Shows version and exits.
	-f folder Target folder
	-p prefix prefix. Could be a str in regexp form.
	Default='function\|module'
	-a activeModule Search for modules that are set to run at this moment by
	matching 'xxx(...);'. Useful for complex lib.
	-i includePrefix Int. 1 = Include prefix defined in -p
	-x fileExt Specifiy file extension like doc, py ... etc. Default:scad


	Note: previous version was wrongfully dated as 2017.10.5
	(should have been 2017.10.4)

	Current ver 2017.10.4-2:
	-- more structural and clearer display
	-- output is grouped by file

	ver 2017.10.4:
	This new version (2017.10.4) added couple options: prefix, file
	extension inclusion/exclusion, error handling. The target word to
	be search could be a string representation of python regexp pattern.

	Example:

	>>> py find.py getPlane -f scadx -i 0
	+================================================================================
	\| Matches:
	\| scadx_geometry.scad:
	\| #4769: function getPlaneByNormalLine(pq)=
	\| #4780: module getPlaneByNormalLine_demo()
	\| #4783: pl = getPlaneByNormalLine( pq );
	\| #5362: ? let( int= projPt( pt, getPlaneByNormalLine(pl)) )
	\| #5365: ? let( int= projPt( pt, getPlaneByNormalLine( [ORIGIN, pl])) )
	\| #5400: pl = expandPts( getPlaneByNormalLine(pq), 1);
	\| #5427: pl = expandPts( getPlaneByNormalLine( [ORIGIN, pt2]), 1);
	\| Untitled.scad:
	\| #13: //getPlaneByNormalLine_demo();
	+================================================================================
	::: Matching re.compile('getPlane') in folder 'scadx':
	::: Arguments Namespace(f='scadx', i=0, p='function\|module', word='getPlane', x='scad')

	Match 8 times in 2 files (see above for details):

	scadx/scadx_geometry.scad: 7
	scadx/Untitled.scad: 1

	With 1 file errors:
	(0)'scad_string.scad': UnicodeDecodeError: 'charmap' codec can't decode byte 0x8d in position 327: character maps to <undefined>


	'''

	import os, sys, re, argparse, traceback

	# files with the following extentions are excluded
	EXCLUDEFILES=["png","gif", "bmp", "lnk"]

	# argument setup and parse
	argp = argparse.ArgumentParser(
	prog= __file__
	,description= __description__+ "\n" + __author__ )
	argp.add_argument('--version', action='version'
	, version='%(prog)s '+__version__
	, help='Shows version and exits.')
	argp.add_argument("word", metavar="word", type=str, help="Target word or regex pattern string" )
	argp.add_argument("-f", metavar="folder", type=str, default=".", help="Target folder")
	argp.add_argument("-p", metavar="prefix", type=str, default="function\|module"
	, help="prefix. Could be a str in regexp form. Default='function\|module'")
	argp.add_argument("-a", metavar="activeModule", type=bool
	, default=0
	, help="Search for modules that are set to run at this moment by matching 'xxx(...);'. Useful for complex lib."
	)
	argp.add_argument("-i", metavar="includePrefix", type=int
	, default=1
	, help="Int. 1 = Include prefix defined in -p"
	)

	argp.add_argument('-x', metavar="fileExt", type=str, default="scad"
	, help="Specifiy file extension like doc, py ... etc. Default:scad"
	)

	args= argp.parse_args()


	foldername = args.f
	word = args.word

	if args.a==True:
	ptn_str= "^\w\([\w=\.\-,\[\] ]\);"
	elif args.i:
	ptn_str = "^("+args.p+") "+ "{0}".format(word)
	else:
	ptn_str = "{0}".format(word)

	pattern = re.compile(ptn_str)

	#print("ptn_str: '%s'"%ptn_str)
	print( '+'+"===="*20)
	print('\| Matches:')

	mcount=0
	fcount=0
	errors = []
	files = {}

	def isValidFile( fn ):
	''' Decide if a file is valid based on its extension '''
	if args.x:
	return fn.endswith("."+args.x)
	else:
	return not sum( [ fn.endswith("."+x) and 1 or 0 for x in EXCLUDEFILES ]) > 0


	for f in os.listdir( foldername ):
	if ( os.path.isfile(os.path.join(foldername, f)) and
	isValidFile(f)
	):
	fullpath = os.path.join(foldername, f)
	fcount=fcount+1
	thisFileHasMatches = False
	try:
	lines = open(fullpath, "r").read().split("\n")
	for i,ln in enumerate(lines):
	if args.i==0:
	#match = word in ln or pattern.search("{0}".format(ln))
	match = pattern.search("{0}".format(ln))
	if match:
	if not thisFileHasMatches:
	print("\| "+f+":")
	thisFileHasMatches = True
	mcount = mcount+1
	files.setdefault(fullpath,0)
	files[fullpath] += 1
	print( "\| #%s: %s "%(i, ln)) #.strip()) )
	else:
	#match = pattern.match(ln) or pattern.search("{0}".format(ln))
	match = pattern.search("{0}".format(ln))
	if match:
	if not thisFileHasMatches:
	print("\| "+f+":")
	thisFileHasMatches = True
	mcount = mcount+1
	files.setdefault(fullpath,0)
	files[fullpath] += 1
	print( "\| #%s: %s "%(i, ln.strip()) )
	except:
	errors.append(
	"'%s': %s"%(f,traceback.format_exc().split("\n")[-2])
	)

	print('+'+'===='*20)
	print( "::: Matching '%s' in folder '%s':"%('{0}'.format(ptn_str), foldername) )
	print( "::: Arguments %s"%args )
	print('\nMatch %s times in %s files (see above for details):\n'%( mcount
	, len(files.keys()) ))
	print( '\n'.join( [ '%s: %s'%(x,y) for x,y in files.items() ] ) )
	print('\nWith %s file errors:'%( len(errors)) )
	print( '\n'.join( [ "(%i)%s"%(i,x) for i,x in enumerate(errors)] ) )