Last active
November 16, 2017 06:12
-
-
Save runsun/10d5a3d264ab73c75ca0 to your computer and use it in GitHub Desktop.
Find lines where a word is located in all files under a folder.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__description__ = "Find lines where a word is defined in a folder." | |
__version__= "171115-1" | |
__author__= "Runsun Pan" | |
''' | |
>>> py find.py -h | |
usage: find.py [-h] [--version] [-f folder] [-p prefix] [-a activeModule] | |
[-i includePrefix] [-x fileExt] | |
word | |
Find lines where a word is defined in a folder. Runsun Pan | |
positional arguments: | |
word Target word or regex pattern string | |
optional arguments: | |
-h, --help show this help message and exit | |
--version Shows version and exits. | |
-f folder Target folder | |
-p prefix prefix. Could be a str in regexp form. | |
Default='function|module' | |
-a activeModule Search for modules that are set to run at this moment by | |
matching 'xxx(...);'. Useful for complex lib. | |
-i includePrefix Int. 1 = Include prefix defined in -p | |
-x fileExt Specifiy file extension like doc, py ... etc. Default:scad | |
Note: previous version was wrongfully dated as 2017.10.5 | |
(should have been 2017.10.4) | |
Current ver 2017.10.4-2: | |
-- more structural and clearer display | |
-- output is grouped by file | |
ver 2017.10.4: | |
This new version (2017.10.4) added couple options: prefix, file | |
extension inclusion/exclusion, error handling. The target word to | |
be search could be a string representation of python regexp pattern. | |
Example: | |
>>> py find.py getPlane -f scadx -i 0 | |
+================================================================================ | |
| Matches: | |
| scadx_geometry.scad: | |
| #4769: function getPlaneByNormalLine(pq)= | |
| #4780: module getPlaneByNormalLine_demo() | |
| #4783: pl = getPlaneByNormalLine( pq ); | |
| #5362: ? let( int= projPt( pt, getPlaneByNormalLine(pl)) ) | |
| #5365: ? let( int= projPt( pt, getPlaneByNormalLine( [ORIGIN, pl])) ) | |
| #5400: pl = expandPts( getPlaneByNormalLine(pq), 1); | |
| #5427: pl = expandPts( getPlaneByNormalLine( [ORIGIN, pt2]), 1); | |
| Untitled.scad: | |
| #13: //getPlaneByNormalLine_demo(); | |
+================================================================================ | |
::: Matching re.compile('getPlane') in folder 'scadx': | |
::: Arguments Namespace(f='scadx', i=0, p='function|module', word='getPlane', x='scad') | |
Match 8 times in 2 files (see above for details): | |
scadx/scadx_geometry.scad: 7 | |
scadx/Untitled.scad: 1 | |
With 1 file errors: | |
(0)'scad_string.scad': UnicodeDecodeError: 'charmap' codec can't decode byte 0x8d in position 327: character maps to <undefined> | |
''' | |
import os, sys, re, argparse, traceback | |
# files with the following extentions are excluded | |
EXCLUDEFILES=["png","gif", "bmp", "lnk"] | |
# argument setup and parse | |
argp = argparse.ArgumentParser( | |
prog= __file__ | |
,description= __description__+ "\n" + __author__ ) | |
argp.add_argument('--version', action='version' | |
, version='%(prog)s '+__version__ | |
, help='Shows version and exits.') | |
argp.add_argument("word", metavar="word", type=str, help="Target word or regex pattern string" ) | |
argp.add_argument("-f", metavar="folder", type=str, default=".", help="Target folder") | |
argp.add_argument("-p", metavar="prefix", type=str, default="function|module" | |
, help="prefix. Could be a str in regexp form. Default='function|module'") | |
argp.add_argument("-a", metavar="activeModule", type=bool | |
, default=0 | |
, help="Search for modules that are set to run at this moment by matching 'xxx(...);'. Useful for complex lib." | |
) | |
argp.add_argument("-i", metavar="includePrefix", type=int | |
, default=1 | |
, help="Int. 1 = Include prefix defined in -p" | |
) | |
argp.add_argument('-x', metavar="fileExt", type=str, default="scad" | |
, help="Specifiy file extension like doc, py ... etc. Default:scad" | |
) | |
args= argp.parse_args() | |
foldername = args.f | |
word = args.word | |
if args.a==True: | |
ptn_str= "^\w*\([\w=\.\-,\[\] ]*\);" | |
elif args.i: | |
ptn_str = "^("+args.p+") "+ "{0}".format(word) | |
else: | |
ptn_str = "{0}".format(word) | |
pattern = re.compile(ptn_str) | |
#print("ptn_str: '%s'"%ptn_str) | |
print( '+'+"===="*20) | |
print('| Matches:') | |
mcount=0 | |
fcount=0 | |
errors = [] | |
files = {} | |
def isValidFile( fn ): | |
''' Decide if a file is valid based on its extension ''' | |
if args.x: | |
return fn.endswith("."+args.x) | |
else: | |
return not sum( [ fn.endswith("."+x) and 1 or 0 for x in EXCLUDEFILES ]) > 0 | |
for f in os.listdir( foldername ): | |
if ( os.path.isfile(os.path.join(foldername, f)) and | |
isValidFile(f) | |
): | |
fullpath = os.path.join(foldername, f) | |
fcount=fcount+1 | |
thisFileHasMatches = False | |
try: | |
lines = open(fullpath, "r").read().split("\n") | |
for i,ln in enumerate(lines): | |
if args.i==0: | |
#match = word in ln or pattern.search("{0}".format(ln)) | |
match = pattern.search("{0}".format(ln)) | |
if match: | |
if not thisFileHasMatches: | |
print("| "+f+":") | |
thisFileHasMatches = True | |
mcount = mcount+1 | |
files.setdefault(fullpath,0) | |
files[fullpath] += 1 | |
print( "| #%s: %s "%(i, ln)) #.strip()) ) | |
else: | |
#match = pattern.match(ln) or pattern.search("{0}".format(ln)) | |
match = pattern.search("{0}".format(ln)) | |
if match: | |
if not thisFileHasMatches: | |
print("| "+f+":") | |
thisFileHasMatches = True | |
mcount = mcount+1 | |
files.setdefault(fullpath,0) | |
files[fullpath] += 1 | |
print( "| #%s: %s "%(i, ln.strip()) ) | |
except: | |
errors.append( | |
"'%s': %s"%(f,traceback.format_exc().split("\n")[-2]) | |
) | |
print('+'+'===='*20) | |
print( "::: Matching '%s' in folder '%s':"%('{0}'.format(ptn_str), foldername) ) | |
print( "::: Arguments %s"%args ) | |
print('\nMatch %s times in %s files (see above for details):\n'%( mcount | |
, len(files.keys()) )) | |
print( '\n'.join( [ '%s: %s'%(x,y) for x,y in files.items() ] ) ) | |
print('\nWith %s file errors:'%( len(errors)) ) | |
print( '\n'.join( [ "(%i)%s"%(i,x) for i,x in enumerate(errors)] ) ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment