Last active
November 1, 2015 20:50
-
-
Save hugbug/102bae8f586264dde61f to your computer and use it in GitHub Desktop.
nzbget/nzbget#103: Source code naming convention: Renaming tool
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# | |
# Identifier renaming tool to refactor nzbget source code. | |
# | |
# Copyright (C) 2015 Andrey Prygunkov <hugbug@users.sourceforge.net> | |
# | |
# This program is free software; you can redistribute it and/or modify it | |
# under the terms of the GNU Lesser General Public License as published by | |
# the Free Software Foundation; either version 3 of the License, or | |
# (at your option) any later version. | |
# | |
# This program is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
# GNU Lesser General Public License for more details. | |
# | |
# You should have received a copy of the GNU Lesser General Public License | |
# along with the program. If not, see <http://www.gnu.org/licenses/>. | |
# | |
import sys | |
import os | |
import lex | |
errors = 0 | |
tokens = ( | |
'DIRECTIVE', | |
'WORD', | |
'NUMBER', | |
'STRING', | |
'CHAR') | |
# Preprocessor directive | |
t_DIRECTIVE = r'\#.*' | |
t_WORD = r'[a-zA-Z_$][0-9a-zA-Z_$]*' | |
t_NUMBER = r'0x[0-9a-fA-F]+|\d+' | |
# Literals, one character represents one token | |
literals = '[]<>{}():;=.,-+/?*!|&~^%\\' | |
# A string containing ignored characters (spaces and tabs) | |
t_ignore = ' \t' | |
# C or C++ comment (ignore) | |
def t_comment(t): | |
r'(/\*(.|\n)*?\*/)|(//.*)' | |
t.lexer.lineno += t.value.count('\n') | |
pass | |
# C string | |
t_STRING = r'\"([^\\\n]|(\\.))*?\"' | |
# C character literal | |
t_CHAR = r'\'([^\\\n]|(\\.))*?\'' | |
# Define a rule so we can track line numbers | |
def t_newline(t): | |
r'\n+' | |
t.lexer.lineno += len(t.value) | |
# Error handling rule | |
def t_error(t): | |
global errors | |
print("Illegal character '%s'" % t.value[0]) | |
errors += 1 | |
t.lexer.skip(1) | |
#sys.exit(1) | |
acronyms = ('NZB', 'URL', 'ID', 'TLS', 'NNTP', 'IP') | |
def is_renamable(token): | |
# rename tokens having acronyms in their names | |
for ac in acronyms: | |
if token.find(ac) > -1: | |
return True | |
# do not rename everything else | |
return False | |
def construct_new_name(name): | |
if name == name.upper(): return None | |
new_name = name | |
for ac in acronyms: | |
if new_name.find(ac) > -1: | |
new_name = new_name.replace(ac, ac[0] + ac[1:].lower()) | |
return new_name if name <> new_name else None | |
def collect_names(content): | |
lex.input(content) | |
candidates = set() | |
for tok in iter(lex.token, None): | |
if tok.type == 'WORD' and is_renamable(tok.value): | |
candidates.add(tok.value) | |
renamables = {} | |
for tok in candidates: | |
new_name = construct_new_name(tok) | |
if new_name: | |
renamables[tok] = new_name | |
return renamables | |
def perform_rename(content, names): | |
new_content = content | |
delta = 0 | |
lex.input(content) | |
for tok in iter(lex.token, None): | |
if tok.type == 'WORD': | |
new_name = names.get(tok.value) | |
if new_name: | |
new_content = new_content[:tok.lexpos + delta] + new_name + new_content[tok.lexpos + delta + len(tok.value):] | |
delta += len(new_name) - len(tok.value) | |
return new_content | |
def process_file(filename): | |
content = open(filename, 'rb').read() | |
names = collect_names(content) | |
#print(names) | |
new_content = perform_rename(content, names) | |
#print(new_content) | |
open(filename, 'wb').write(new_content) | |
lex.lex() | |
fname = sys.argv[1] | |
if os.path.isfile(fname): | |
process_file(fname) | |
elif os.path.isdir(fname): | |
for root, dirs, files in os.walk(fname): | |
for filename in files: | |
if filename[0] == '.': continue | |
path = os.path.join(root, filename) | |
print(path) | |
sys.stdout.flush() | |
process_file(path) | |
if errors: | |
print('%s error(s)' % errors) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# | |
# Identifier renaming tool to refactor nzbget source code. | |
# | |
# Copyright (C) 2015 Andrey Prygunkov <hugbug@users.sourceforge.net> | |
# | |
# This program is free software; you can redistribute it and/or modify it | |
# under the terms of the GNU Lesser General Public License as published by | |
# the Free Software Foundation; either version 3 of the License, or | |
# (at your option) any later version. | |
# | |
# This program is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
# GNU Lesser General Public License for more details. | |
# | |
# You should have received a copy of the GNU Lesser General Public License | |
# along with the program. If not, see <http://www.gnu.org/licenses/>. | |
# | |
import sys | |
import os | |
# Using PLY library by David M. Beazley (Dabeaz LLC) | |
# Get a copy from http://www.dabeaz.com/ply/, extract it and put file "lex.py" | |
# into the directory where this script is located. | |
import lex | |
errors = 0 | |
tokens = ( | |
'DIRECTIVE', | |
'WORD', | |
'NUMBER', | |
'STRING', | |
'CHAR') | |
# Preprocessor directive | |
t_DIRECTIVE = r'\#.*' | |
t_WORD = r'[a-zA-Z_$][0-9a-zA-Z_$]*' | |
t_NUMBER = r'0x[0-9a-fA-F]+|\d+' | |
# Literals, one character represents one token | |
literals = '[]<>{}():;=.,-+/?*!|&~^%\\' | |
# A string containing ignored characters (spaces and tabs) | |
t_ignore = ' \t' | |
# C or C++ comment (ignore) | |
def t_comment(t): | |
r'(/\*(.|\n)*?\*/)|(//.*)' | |
t.lexer.lineno += t.value.count('\n') | |
pass | |
# C string | |
t_STRING = r'\"([^\\\n]|(\\.))*?\"' | |
# C character literal | |
t_CHAR = r'\'([^\\\n]|(\\.))*?\'' | |
# Define a rule so we can track line numbers | |
def t_newline(t): | |
r'\n+' | |
t.lexer.lineno += len(t.value) | |
# Error handling rule | |
def t_error(t): | |
global errors | |
print("Illegal character '%s'" % t.value[0]) | |
errors += 1 | |
t.lexer.skip(1) | |
#sys.exit(1) | |
def is_renamable(token): | |
# rename tokens starting with "g_" | |
return token.startswith("g_") | |
def construct_new_name(name): | |
ln = len(name) | |
ch1 = name[0] | |
ch2 = name[1] if ln > 1 else None | |
ch3 = name[2] if ln > 2 else None | |
ch4 = name[3] if ln > 3 else None | |
ch5 = name[4] if ln > 4 else None | |
new_name = None | |
if ch1 == 'g' and ch2 == '_' and ch3 in ['p', 'i', 'l', 'b', 't', 'e'] and ch4.upper() == ch4: | |
new_name = 'g_' + ch4.upper() + name[4:] | |
elif ch1 == 'g' and ch2 == '_' and ch3 == 's' and ch4 == 'z' and ch5.upper() == ch5: | |
new_name = 'g_' + ch5.upper() + name[5:] | |
return new_name | |
def collect_names(content): | |
lex.input(content) | |
candidates = set() | |
for tok in iter(lex.token, None): | |
if tok.type == 'WORD' and is_renamable(tok.value): | |
candidates.add(tok.value) | |
renamables = {} | |
for tok in candidates: | |
new_name = construct_new_name(tok) | |
if new_name: | |
renamables[tok] = new_name | |
return renamables | |
def perform_rename(content, names): | |
new_content = content | |
delta = 0 | |
lex.input(content) | |
for tok in iter(lex.token, None): | |
if tok.type == 'WORD': | |
new_name = names.get(tok.value) | |
if new_name: | |
new_content = new_content[:tok.lexpos + delta] + new_name + new_content[tok.lexpos + delta + len(tok.value):] | |
delta += len(new_name) - len(tok.value) | |
return new_content | |
def process_file(filename): | |
content = open(filename, 'rb').read() | |
names = collect_names(content) | |
if want_rename: | |
new_content = perform_rename(content, names) | |
open(filename, 'wb').write(new_content) | |
else: | |
for r in names: | |
print('%s;%s' % (r, names[r])) | |
print('------------') | |
print('%s renamables' % len(names)) | |
want_rename = True | |
lex.lex() | |
fname = sys.argv[1] | |
if os.path.isfile(fname): | |
process_file(fname) | |
elif os.path.isdir(fname): | |
for root, dirs, files in os.walk(fname): | |
for filename in files: | |
if filename[0] == '.': continue | |
path = os.path.join(root, filename) | |
print(path) | |
sys.stdout.flush() | |
process_file(path) | |
if errors: | |
print('%s error(s)' % errors) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# | |
# Identifier renaming tool to refactor nzbget source code. | |
# | |
# Copyright (C) 2015 Andrey Prygunkov <hugbug@users.sourceforge.net> | |
# | |
# This program is free software; you can redistribute it and/or modify it | |
# under the terms of the GNU Lesser General Public License as published by | |
# the Free Software Foundation; either version 3 of the License, or | |
# (at your option) any later version. | |
# | |
# This program is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
# GNU Lesser General Public License for more details. | |
# | |
# You should have received a copy of the GNU Lesser General Public License | |
# along with the program. If not, see <http://www.gnu.org/licenses/>. | |
# | |
import sys | |
import os | |
# Using PLY library by David M. Beazley (Dabeaz LLC) | |
# Get a copy from http://www.dabeaz.com/ply/, extract it and put file "lex.py" | |
# into the directory where this script is located. | |
import lex | |
errors = 0 | |
tokens = ( | |
'DIRECTIVE', | |
'WORD', | |
'NUMBER', | |
'STRING', | |
'CHAR') | |
# Preprocessor directive | |
t_DIRECTIVE = r'\#.*' | |
t_WORD = r'[a-zA-Z_$][0-9a-zA-Z_$]*' | |
t_NUMBER = r'0x[0-9a-fA-F]+|\d+' | |
# Literals, one character represents one token | |
literals = '[]<>{}():;=.,-+/?*!|&~^%\\' | |
# A string containing ignored characters (spaces and tabs) | |
t_ignore = ' \t' | |
# C or C++ comment (ignore) | |
def t_comment(t): | |
r'(/\*(.|\n)*?\*/)|(//.*)' | |
t.lexer.lineno += t.value.count('\n') | |
pass | |
# C string | |
t_STRING = r'\"([^\\\n]|(\\.))*?\"' | |
# C character literal | |
t_CHAR = r'\'([^\\\n]|(\\.))*?\'' | |
# Define a rule so we can track line numbers | |
def t_newline(t): | |
r'\n+' | |
t.lexer.lineno += len(t.value) | |
# Compute column. | |
# input is the input text string | |
# token is a token instance | |
def find_column(input, token): | |
last_cr = input.rfind('\n', 0, token.lexpos) | |
if last_cr < 0: | |
last_cr = 0 | |
column = (token.lexpos - last_cr) | |
return column | |
# Error handling rule | |
def t_error(t): | |
global errors | |
print("Illegal character '%s'" % t.value[0]) | |
errors += 1 | |
t.lexer.skip(1) | |
#sys.exit(1) | |
def is_renamable(token): | |
# do not rename tokens consisting of lower case characters only | |
if token.lower() == token: return False | |
# do not rename tokens starting with an upper case characters | |
if token[0].upper() == token[0]: return False | |
# rename everything else | |
return True | |
def construct_new_name(name): | |
if name == 'bOK': return 'ok' | |
if name == 'iID': return 'id' | |
if name == 'm_iID': return 'm_id' | |
if name == 'bDelete': return 'deleteObj' | |
ln = len(name) | |
ch1 = name[0] | |
ch2 = name[1] if ln > 1 else None | |
ch3 = name[2] if ln > 2 else None | |
ch4 = name[3] if ln > 3 else None | |
ch5 = name[4] if ln > 4 else None | |
new_name = None | |
if ch1 in ['p', 'i', 'l', 'b', 't', 'e'] and ch2.upper() == ch2: | |
new_name = ch2.lower() + name[2:] | |
elif ch1 == 's' and ch2 == 'z' and ch3.upper() == ch3: | |
new_name = ch3.lower() + name[3:] | |
elif ch1 == 'm' and ch2 == '_' and ch3 in ['p', 'i', 'l', 'b', 't', 'e'] and ch4.upper() == ch4: | |
new_name = 'm_' + ch4.lower() + name[4:] | |
elif ch1 == 'm' and ch2 == '_' and ch3 == 's' and ch4 == 'z' and ch5.upper() == ch5: | |
new_name = 'm_' + ch5.lower() + name[5:] | |
elif ch1 == 'm' and ch2 == '_' and ch3.upper() == ch3: | |
new_name = 'm_' + ch3.lower() + name[3:] | |
elif name[:7] == 'm_mutex' and name[7].upper() == name[7]: | |
new_name = 'm_' + name[7].lower() + name[8:] + 'Mutex' | |
if new_name <> None: | |
new_name = new_name.replace('ID', 'Id') | |
new_name = new_name.replace('iD', 'id') | |
new_name = new_name.replace('NZB', 'Nzb') | |
new_name = new_name.replace('nZB', 'nzb') | |
new_name = new_name.replace('URL', 'Url') | |
new_name = new_name.replace('uRL', 'url') | |
return new_name | |
def collect_names(content): | |
lex.input(content) | |
candidates = set() | |
for tok in iter(lex.token, None): | |
if tok.type == 'WORD' and is_renamable(tok.value): | |
candidates.add(tok.value) | |
renamables = {} | |
for tok in candidates: | |
new_name = construct_new_name(tok) | |
if new_name: | |
renamables[tok] = new_name | |
return renamables | |
def perform_rename(content, names): | |
new_content = content | |
delta = 0 | |
lex.input(content) | |
for tok in iter(lex.token, None): | |
if tok.type == 'WORD': | |
new_name = names.get(tok.value) | |
if new_name: | |
new_content = new_content[:tok.lexpos + delta] + new_name + new_content[tok.lexpos + delta + len(tok.value):] | |
delta += len(new_name) - len(tok.value) | |
return new_content | |
def process_file(filename): | |
content = open(filename, 'r').read() | |
names = collect_names(content) | |
if want_rename: | |
new_content = perform_rename(content, names) | |
open(filename, 'w').write(new_content) | |
else: | |
for r in names: | |
print('%s;%s' % (r, names[r])) | |
print('------------') | |
print('%s renamables' % len(names)) | |
want_rename = True | |
lex.lex() | |
fname = sys.argv[1] | |
if os.path.isfile(fname): | |
process_file(fname) | |
elif os.path.isdir(fname): | |
for root, dirs, files in os.walk(fname): | |
for filename in files: | |
path = os.path.join(root, filename) | |
print(path) | |
sys.stdout.flush() | |
process_file(path) | |
if errors: | |
print('%s error(s)' % errors) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# | |
# Identifier renaming tool to refactor nzbget source code. | |
# | |
# Copyright (C) 2015 Andrey Prygunkov <hugbug@users.sourceforge.net> | |
# | |
# This program is free software; you can redistribute it and/or modify it | |
# under the terms of the GNU Lesser General Public License as published by | |
# the Free Software Foundation; either version 3 of the License, or | |
# (at your option) any later version. | |
# | |
# This program is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
# GNU Lesser General Public License for more details. | |
# | |
# You should have received a copy of the GNU Lesser General Public License | |
# along with the program. If not, see <http://www.gnu.org/licenses/>. | |
# | |
import sys | |
import os | |
# Using PLY library by David M. Beazley (Dabeaz LLC) | |
# Get a copy from http://www.dabeaz.com/ply/, extract it and put file "lex.py" | |
# into the directory where this script is located. | |
import lex | |
errors = 0 | |
dupes = 0 | |
tokens = ( | |
'DIRECTIVE', | |
'WORD', | |
'NUMBER', | |
'STRING', | |
'CHAR') | |
# Preprocessor directive | |
t_DIRECTIVE = r'\#.*' | |
t_WORD = r'[a-zA-Z_$][0-9a-zA-Z_$]*' | |
t_NUMBER = r'0x[0-9a-fA-F]+|\d+' | |
# Literals, one character represents one token | |
literals = '[]<>{}():;=.,-+/?*!|&~^%\\' | |
# A string containing ignored characters (spaces and tabs) | |
t_ignore = ' \t' | |
# C or C++ comment (ignore) | |
def t_comment(t): | |
r'(/\*(.|\n)*?\*/)|(//.*)' | |
t.lexer.lineno += t.value.count('\n') | |
pass | |
# C string | |
t_STRING = r'\"([^\\\n]|(\\.))*?\"' | |
# C character literal | |
t_CHAR = r'\'([^\\\n]|(\\.))*?\'' | |
# Define a rule so we can track line numbers | |
def t_newline(t): | |
r'\n+' | |
t.lexer.lineno += len(t.value) | |
# Compute column. | |
# input is the input text string | |
# token is a token instance | |
def find_column(input, token): | |
last_cr = input.rfind('\n', 0, token.lexpos) | |
if last_cr < 0: | |
last_cr = 0 | |
column = (token.lexpos - last_cr) | |
return column | |
# Error handling rule | |
def t_error(t): | |
global errors | |
print("Illegal character '%s'" % t.value[0]) | |
errors += 1 | |
t.lexer.skip(1) | |
#sys.exit(1) | |
def is_renamable(token): | |
# do not rename tokens consisting of lower case characters only | |
if token.lower() == token: return False | |
# do not rename tokens starting with an upper case characters | |
if token[0].upper() == token[0]: return False | |
# rename everything else | |
return True | |
def construct_new_name(name): | |
if name == 'bOK': return 'ok' | |
if name == 'iID': return 'id' | |
if name == 'm_iID': return 'm_id' | |
if name == 'bDelete': return 'deleteObj' | |
ln = len(name) | |
ch1 = name[0] | |
ch2 = name[1] if ln > 1 else None | |
ch3 = name[2] if ln > 2 else None | |
ch4 = name[3] if ln > 3 else None | |
ch5 = name[4] if ln > 4 else None | |
new_name = None | |
if ch1 in ['p', 'i', 'l', 'b', 't', 'e'] and ch2.upper() == ch2: | |
new_name = ch2.lower() + name[2:] | |
elif ch1 == 's' and ch2 == 'z' and ch3.upper() == ch3: | |
new_name = ch3.lower() + name[3:] | |
elif ch1 == 'm' and ch2 == '_' and ch3 in ['p', 'i', 'l', 'b', 't', 'e'] and ch4.upper() == ch4: | |
new_name = 'm_' + ch4.lower() + name[4:] | |
elif ch1 == 'm' and ch2 == '_' and ch3 == 's' and ch4 == 'z' and ch5.upper() == ch5: | |
new_name = 'm_' + ch5.lower() + name[5:] | |
elif ch1 == 'm' and ch2 == '_' and ch3.upper() == ch3: | |
new_name = 'm_' + ch3.lower() + name[3:] | |
elif name[:7] == 'm_mutex' and name[7].upper() == name[7]: | |
new_name = 'm_' + name[7].lower() + name[8:] + 'Mutex' | |
if new_name <> None: | |
new_name = new_name.replace('ID', 'Id') | |
new_name = new_name.replace('iD', 'id') | |
new_name = new_name.replace('NZB', 'Nzb') | |
new_name = new_name.replace('nZB', 'nzb') | |
new_name = new_name.replace('URL', 'Url') | |
new_name = new_name.replace('uRL', 'url') | |
return new_name | |
def collect_names(content): | |
lex.input(content) | |
candidates = set() | |
for tok in iter(lex.token, None): | |
if tok.type == 'WORD' and is_renamable(tok.value): | |
candidates.add(tok.value) | |
renamables = {} | |
for tok in candidates: | |
new_name = construct_new_name(tok) | |
if new_name: | |
renamables[tok] = new_name | |
return renamables | |
def validate_names(names): | |
global dupes | |
unique = set() | |
for tok in names: | |
new_name = names[tok] | |
if new_name in unique: | |
for other_tok in names: | |
if tok != other_tok and names[other_tok] == new_name: | |
print('%s,%s->%s' % (tok, other_tok, new_name)) | |
dupes += 1 | |
unique.add(new_name) | |
def process_file(filename): | |
content = open(filename, 'r').read() | |
names = collect_names(content) | |
validate_names(names) | |
lex.lex() | |
fname = sys.argv[1] | |
if os.path.isfile(fname): | |
process_file(fname) | |
elif os.path.isdir(fname): | |
for root, dirs, files in os.walk(fname): | |
for filename in files: | |
if filename[0] == '.': continue | |
path = os.path.join(root, filename) | |
print(path) | |
sys.stdout.flush() | |
process_file(path) | |
if dupes: | |
print('%s dupes(s)' % dupes) | |
if errors: | |
print('%s error(s)' % errors) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment