sanpingz/bc_grep.py

## bc_grep.py
#!/usr/bin/env python
"""Static code analysis for C/C++,
Grep out the bad codes in the C/C++ file,
The premise is that codes were executable.
    Usage: bc_grep <C/C++ file>/<C/C++ dir> <-s> <-p>
    '-s' is optional, means simple. Defalut will display bad comments detail.
    '-p' is optional, means the first arg is a file with dirs or files list in it"""
__author__ = 'sanpingz (sanping.zhang@alcatel-lucent.com)'

import re
import sys
import os
from os.path import join

pattern = (
		r'(?:(?s)#\s*if.*?#\s*endif)|(?:/(?:\\\n)?/.*?\n)|(?:(?s)".*?")|((?s)/(?:\\\n)?\*.*?\*(?:\\\n)?/)',	# /* code /* code */
		r'(?<!\A)/(?:\\\n)?\*',
		r'(?m)^\s*[\w+\-]+',
		r'(?m)^',
		r'(?m)^/(?:\\\n)?/.*?(?:\\|\?\?/)\n\s*[\w+\-]+',	# // end with backslash
		r'\r?\n',
		r'\bif\b[ \t]*\([^"(]*?\b\w+\b[ \t]*=(?:[ \t]*!)?[ \t]*\b\w+\b.*?\)',	# if(a=b), if(a=!b)
		r'\bif\b[ \t]*\([^"(]*?\b\w+\b[ \t]*[><](?:[ \t]*=)?[ \t]*\b\w+\b[ \t]*[><](?:[ \t]*=)?[ \t]*\b\w+\b.*?\)',	# if( 0 < a < 5)
		r'\.(?:c|cpp)+$'
		)

reg = map(re.compile, pattern)
num = 0


def register():
	return (
		comments,
		backslash,
		#carelessif
	)


def comments(lines, fn, mode):
	global num
	for tup in reg[0].finditer(lines):
		# macro, quota, single, block = tup.groups()
		# if not macro and not quota and not single and block:
		block = tup.group(1)
		if block:
			cm = block.strip()
			if reg[1].search(cm) and reg[2].search(cm):
				num += 1
				output(fn, cm, lines=lines, mode=mode)

def backslash(lines, fn, mode):
	global num
	for tup in reg[4].finditer(lines):
		num += 1;
		block = tup.group(0).strip()
		output(fn, block, lines=lines, mode=mode)

def carelessif(lines, fn, mode):
	global num
	count = 0
	for line in iter(reg[5].split(lines)):
		count += 1
		if reg[6].search(line) or reg[7].search(line):
			num += 1
			output(fn, line.strip(), mode=mode, loc = count)


def process(lines, fn, mode):
	for func in register():
		func(lines, fn, mode)

def output(fn, block, lines='', mode='-s', loc=0):
	if not loc:
		loc = re.search(re.escape(block), lines).start(0)
		loc = lines[:loc].count('\n') + 1
	print '%s@[line:%d]' % (fn, loc)
	if '-s' == mode:
		block = '[' + block +']'
		print reg[3].sub('\t', block)
		print '='*80

def iscpp(fn):
	return reg[-1].search(fn)

def check_file(fn, mode='-s'):
	cm = ''
	with open(fn) as f:
		lines = f.read()
		if lines:
			process(lines, fn, mode)

def check_dir(dn, mode='-s'):
	gen = os.walk(dn)
	for base, dns, fns in gen:
		for fn in fns:
			if iscpp(fn):
				check_file(join(base, fn), mode=mode)


if __name__ == '__main__':
	if len(sys.argv) < 2:
		print __doc__
		sys.exit(1)
	else:
		m = '-d' if '-s' in sys.argv else '-s'
		fn = sys.argv[1]
		if os.path.isfile(fn):
			if '-p' in sys.argv:
				with open(fn) as f:
					for line in iter(f.readline, ''):
						if os.path.isfile(line) and iscpp(line):
							check_file(line, mode=m)
						elif os.path.isdir(line):
							check_dir(line, mode=m)
			elif iscpp(fn):
				check_file(fn, mode=m)

		elif os.path.isdir(fn):
			check_dir(fn, mode=m)

		if num:
			print 'total find: %d' % num
		else:
			print 'pass'
	#!/usr/bin/env python
	"""Static code analysis for C/C++,
	Grep out the bad codes in the C/C++ file,
	The premise is that codes were executable.
	Usage: bc_grep <C/C++ file>/<C/C++ dir> <-s> <-p>
	'-s' is optional, means simple. Defalut will display bad comments detail.
	'-p' is optional, means the first arg is a file with dirs or files list in it"""
	__author__ = 'sanpingz (sanping.zhang@alcatel-lucent.com)'

	import re
	import sys
	import os
	from os.path import join

	pattern = (
	r'(?:(?s)#\sif.?#\sendif)\|(?:/(?:\\\n)?/.?\n)\|(?:(?s)".?")\|((?s)/(?:\\\n)?\.?\(?:\\\n)?/)', # /* code /* code */
	r'(?<!\A)/(?:\\\n)?\*',
	r'(?m)^\s*[\w+\-]+',
	r'(?m)^',
	r'(?m)^/(?:\\\n)?/.?(?:\\\|\?\?/)\n\s[\w+\-]+', # // end with backslash
	r'\r?\n',
	r'\bif\b[ \t]\([^"(]?\b\w+\b[ \t]=(?:[ \t]!)?[ \t]\b\w+\b.?\)', # if(a=b), if(a=!b)
	r'\bif\b[ \t]\([^"(]?\b\w+\b[ \t][><](?:[ \t]=)?[ \t]\b\w+\b[ \t][><](?:[ \t]=)?[ \t]\b\w+\b.*?\)', # if( 0 < a < 5)
	r'\.(?:c\|cpp)+$'
	)

	reg = map(re.compile, pattern)
	num = 0


	def register():
	return (
	comments,
	backslash,
	#carelessif
	)


	def comments(lines, fn, mode):
	global num
	for tup in reg[0].finditer(lines):
	# macro, quota, single, block = tup.groups()
	# if not macro and not quota and not single and block:
	block = tup.group(1)
	if block:
	cm = block.strip()
	if reg[1].search(cm) and reg[2].search(cm):
	num += 1
	output(fn, cm, lines=lines, mode=mode)

	def backslash(lines, fn, mode):
	global num
	for tup in reg[4].finditer(lines):
	num += 1;
	block = tup.group(0).strip()
	output(fn, block, lines=lines, mode=mode)

	def carelessif(lines, fn, mode):
	global num
	count = 0
	for line in iter(reg[5].split(lines)):
	count += 1
	if reg[6].search(line) or reg[7].search(line):
	num += 1
	output(fn, line.strip(), mode=mode, loc = count)


	def process(lines, fn, mode):
	for func in register():
	func(lines, fn, mode)

	def output(fn, block, lines='', mode='-s', loc=0):
	if not loc:
	loc = re.search(re.escape(block), lines).start(0)
	loc = lines[:loc].count('\n') + 1
	print '%s@[line:%d]' % (fn, loc)
	if '-s' == mode:
	block = '[' + block +']'
	print reg[3].sub('\t', block)
	print '='*80

	def iscpp(fn):
	return reg[-1].search(fn)

	def check_file(fn, mode='-s'):
	cm = ''
	with open(fn) as f:
	lines = f.read()
	if lines:
	process(lines, fn, mode)

	def check_dir(dn, mode='-s'):
	gen = os.walk(dn)
	for base, dns, fns in gen:
	for fn in fns:
	if iscpp(fn):
	check_file(join(base, fn), mode=mode)


	if __name__ == '__main__':
	if len(sys.argv) < 2:
	print __doc__
	sys.exit(1)
	else:
	m = '-d' if '-s' in sys.argv else '-s'
	fn = sys.argv[1]
	if os.path.isfile(fn):
	if '-p' in sys.argv:
	with open(fn) as f:
	for line in iter(f.readline, ''):
	if os.path.isfile(line) and iscpp(line):
	check_file(line, mode=m)
	elif os.path.isdir(line):
	check_dir(line, mode=m)
	elif iscpp(fn):
	check_file(fn, mode=m)

	elif os.path.isdir(fn):
	check_dir(fn, mode=m)

	if num:
	print 'total find: %d' % num
	else:
	print 'pass'