Skip to content

Instantly share code, notes, and snippets.

@quandyfactory
Created January 24, 2012 19:02
Show Gist options
  • Save quandyfactory/1671909 to your computer and use it in GitHub Desktop.
Save quandyfactory/1671909 to your computer and use it in GitHub Desktop.
Returns total, nonblank and net lines of code for a Python script or a folder of python scripts.
#!/usr/bin/env python
# coding: utf-8
"""
Calculates total, nonblank and net lines of code for Python scripts.
"""
import os
import re
def get_line_count(blob):
"""Returns the number of lines of code"""
return len(blob.split('\n'))
def strip_docstring(blob):
"""Removes docstrings from code"""
docstring = True
while docstring == True:
match_docstring = re.search('\n\s*"""[^"""]*"""', blob)
if not match_docstring:
docstring = False
else:
blob = blob.replace(blob[match_docstring.span()[0]:match_docstring.span()[1]], '')
return blob
def strip_blanklines(blob):
"""Strips blank lines from the code"""
lines = blob.split('\n')
return '\n'.join([line for line in lines if line.strip() != ''])
def strip_comments(blob, delim='#'):
"""Strips comments from the code"""
lines = blob.split('\n')
return '\n'.join([line for line in lines if line.strip()[0] != delim])
def loc(blob, delim='#'):
"""Returns the total line count, nonblank line count, and net line count excluding comments and docstrings"""
total = get_line_count(blob)
blob = strip_blanklines(blob)
nonblank = get_line_count(blob)
blob = strip_docstring(blob)
blob = strip_comments(blob, delim)
net = get_line_count(blob)
return { 'total': total, 'nonblank': nonblank, 'net': net }
def get_folder_total(path):
"""Returns the total, nonblank and net loc for all the python files in a directory"""
files = os.listdir(path)
pythonfiles = ['%s/%s' % (path, filename) for filename in files if filename[-3:] == '.py']
total = { 'net': 0, 'total': 0, 'nonblank': 0 }
for filename in pythonfiles:
with open(filename, 'r') as thisfile:
blob = thisfile.read()
# print filename
thisloc = loc(blob)
for k, v in thisloc.items():
total[k] += v
return total
if __name__ == '__main__':
import sys
args = sys.argv
rules = """
Command line arguments:
-f - File to be tested (required). Filename if in current directory, or else full path.
-c - Character(s) used to delimit a comment (optional - default is #).
"""
if len(args) == 1:
sys.exit(rules)
print
argdict = {}
for arg in args:
if arg[0] == '-':
try:
argdict[arg[1]] = args[args.index(arg)+1]
except:
pass
if 'f' not in argdict:
sys.exit('Error: no filename (-f) in command line arguments.\n\n%s' % (rules))
if 'c' not in argdict:
argdict['c'] = '#' # default comment delimiter
with open(argdict['f'], 'r') as thisfile:
blob = thisfile.read()
if not blob:
sys.exit("File %s does not exist or cannot be opened." % (argdict['f']))
results = loc(blob, argdict['c'])
print
print "---------------------------------------------"
print "Results for %s" % (argdict['f'])
print
for k, v in results.items():
print '%s: %s' % (k, v)
print
print "---------------------------------------------"
print
sys.exit()
@rplzzz
Copy link

rplzzz commented Jun 4, 2015

I don't think line 18 does what you expect it to. Repeating characters in a character set has no additional effect, so the character set [^"""] has the same meaning as [^"]. Therefore, any single instance of a " will cause the match to fail, and indeed if you test it that pattern fails to match a string like """foo "bar" baz""", which is a valid triple-quoted string.

What you want for this situation is a "zero-wdith assertion". A regexp like r'([^"]|"(?!""))* will match an arbitrarily long sequence of characters that aren't quotes, or characters that are, provided that they aren't followed by two more quotes. Add the triple quotes on either side, and that should catch doc strings that have double-quote characters embedded in them.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment