Created
April 7, 2013 04:02
-
-
Save makto/5328941 to your computer and use it in GitHub Desktop.
统计项目中 c 源文件中的注释行数,并去除所有注释
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python2.7 | |
#-*- coding: utf-8 -*- | |
""" | |
删除 C 语言源文件中的注释 | |
并统计每个文件中包含注释的行数 | |
能够处理被续行符分割开的特殊情况: | |
CASE 1: | |
int a; /\ | |
/ this is a comment | |
CASE 2: | |
int b; //\ | |
this is also a comment | |
CASE 3: | |
int c; /\ | |
* this is still a comment */ | |
""" | |
import os | |
from os.path import join as dirsjoin | |
import shutil | |
C_SUFFIX = ('.h', '.c') | |
class LineBox: | |
"""行处理器,以语义行为处理单元 | |
即注释块、字符数组均完整,末尾无续行符的一个段落""" | |
def __init__(self): | |
self.lines = [] # 语义上联系在一起的行 | |
self.seps = [] # 标识各行中注释的始末位置 | |
self.quote = 0 # 是否在双引号中 | |
self.block = False # 是否在块注释中 | |
self.lcmmt = False # 是否在行注释中 | |
self.conti = False # 是否有续行符 | |
self._bslash = False # 上一个字符是否为反斜线 | |
self._slash = False # 上一个字符是否为斜线 | |
self._star = False # 上一个位置是否为星号 | |
def _parse(self, line): | |
"""分析新加入的行内容""" | |
# 判断续行符 | |
if line.endswith('\\'): | |
self.conti = True | |
line = line[:-1] | |
else: | |
self.conti = False | |
seps = self.seps[-1] | |
for idx, char in enumerate(line): | |
# 判断字符数组 | |
if not self.cmmt(): | |
if char == '"' and not self._bslash: | |
self.quote = (self.quote + 1) % 2 | |
if not self.quote: | |
# 块注释是否开始 | |
if char == '*' and self._slash and not self.cmmt()\ | |
and idx-1 not in seps: | |
self.block = True | |
if idx == 0: | |
self.seps[-2].append(-3) | |
else: | |
seps.append(idx-1) | |
# 块注释是否结束 | |
if char == '/' and self._star and self.block\ | |
and idx-2 not in seps: | |
self.block = False | |
if len(seps) == 0: | |
seps.append(0) | |
seps.append(idx) | |
# 行注释是否开始 | |
if char == '/' and self._slash and not self.cmmt(): | |
self.lcmmt = True | |
if idx == 0: | |
self.seps[-2].append(-3) | |
elif idx == 1: | |
seps.append(idx-1) | |
else: | |
seps.extend([idx-1, -2]) # hack | |
self._bslash = True if char == '\\' else False | |
self._slash = True if char == '/' else False | |
self._star = True if char == '*' else False | |
# 是否整行为注释 | |
if self.cmmt() and len(seps) == 0: | |
seps.append(0) | |
# 行注释是否结束 | |
if not self.conti: | |
self.lcmmt = False | |
def add(self, line): | |
"""向语义行中添加一行""" | |
self.lines.append(line) | |
self.seps.append([]) # 保证seps和lines等长且一一对应 | |
self._parse(line.rstrip()) | |
def complete(self): | |
"""语义上的一行已经完整 | |
无未配对的引号,续行符,块注释等""" | |
return not(self.quote or self.block or self.conti) | |
def cmmt(self): | |
"""for convenience""" | |
return self.block or self.lcmmt | |
def getlines(self): | |
"""返回删除注释后的行内容 | |
必须确保语义行完整""" | |
assert self.complete() | |
newlines = [] | |
for lines, seps in zip(self.lines, self.seps): | |
newline = list(lines) | |
for start, stop in map(None, seps[::2], seps[1::2]): | |
if stop == None: | |
del newline[start:] | |
else: | |
del newline[start:stop+1] | |
newline = ''.join(newline) | |
newlines.append(newline) | |
return newlines | |
def count_and_clear(self): | |
"""重新初始化,准备处理下一个语义行 | |
同时返回本段的注释行数""" | |
lines_with_cmmt = [sep for sep in self.seps if sep] | |
self.__init__() | |
return len(lines_with_cmmt) | |
def count_del_cmmt(origin, fresh): | |
"""删除文件中的注释,并写入新的文件 | |
返回注释行的数目""" | |
linebox = LineBox() | |
cmmt_count = 0 | |
for l in origin: | |
linebox.add(l) | |
# 如果语义行已经完整,则写入 | |
# 否则继续读入下一行 | |
if linebox.complete(): | |
lines = linebox.getlines() | |
fresh.writelines(lines) | |
cmmt_count += linebox.count_and_clear() | |
return cmmt_count | |
if __name__ == '__main__': | |
print '请输入项目的绝对路径:' | |
print '(去除注释后的项目文件将保存在`name_nc`目录下)' | |
while True: | |
src_path = raw_input() | |
if os.path.isdir(src_path): | |
break | |
else: | |
print '无此目录,请重新输入:\n' | |
#for convenience | |
#src_path = r'/home/zhongqi/resources/src/' | |
# 为去除注释的文件新建目录 | |
root_path, prj_dir = os.path.split(src_path.rstrip('/')) | |
new_path = dirsjoin(root_path, prj_dir+'_nc/') | |
os.mkdir(new_path) | |
src_path = src_path if src_path.endswith('/') else src_path + '/' | |
# 注释行数统计到 log 中 | |
log_path = dirsjoin(root_path, prj_dir+'.log') | |
log = open(log_path, 'w') | |
print '-'*30 | |
l = len(src_path) | |
cmmt_total = 0 | |
for root, dirs, files in os.walk(src_path): | |
new_root = dirsjoin(new_path, root[l:]) | |
# 复制文件夹 | |
for d in dirs: | |
new_d = dirsjoin(new_root, d) | |
os.mkdir(new_d) | |
# 复制文件 | |
for f in files: | |
f_path = dirsjoin(root, f) | |
f_path_new = dirsjoin(new_root, f) | |
# c 源文件要先删除注释 | |
if f.endswith(C_SUFFIX): | |
with open(f_path, 'r') as origin: | |
with open(f_path_new, 'w') as fresh: | |
cmmt_count = count_del_cmmt(origin, fresh) | |
# 记录日志 | |
rel_path = f_path.replace(src_path, '') | |
log_data = "%s: %s" % (rel_path, cmmt_count) | |
print log_data | |
log.write(log_data+'\n') | |
cmmt_total += cmmt_count | |
# 非 c 源文件直接拷贝 | |
else: | |
shutil.copy(f_path, f_path_new) | |
close_line = '\n项目总注释行数%s\n' % cmmt_total | |
print close_line | |
log.write(close_line) | |
log.close() | |
print '处理完毕,结果记录在%s文件中' % log_path |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment