Skip to content

Instantly share code, notes, and snippets.

@hugbug
Last active November 1, 2015 20:50
Show Gist options
  • Save hugbug/102bae8f586264dde61f to your computer and use it in GitHub Desktop.
Save hugbug/102bae8f586264dde61f to your computer and use it in GitHub Desktop.
nzbget/nzbget#103: Source code naming convention: Renaming tool
#!/usr/bin/env python
#
# Identifier renaming tool to refactor nzbget source code.
#
# Copyright (C) 2015 Andrey Prygunkov <hugbug@users.sourceforge.net>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with the program. If not, see <http://www.gnu.org/licenses/>.
#
import sys
import os
import lex
errors = 0
tokens = (
'DIRECTIVE',
'WORD',
'NUMBER',
'STRING',
'CHAR')
# Preprocessor directive
t_DIRECTIVE = r'\#.*'
t_WORD = r'[a-zA-Z_$][0-9a-zA-Z_$]*'
t_NUMBER = r'0x[0-9a-fA-F]+|\d+'
# Literals, one character represents one token
literals = '[]<>{}():;=.,-+/?*!|&~^%\\'
# A string containing ignored characters (spaces and tabs)
t_ignore = ' \t'
# C or C++ comment (ignore)
def t_comment(t):
r'(/\*(.|\n)*?\*/)|(//.*)'
t.lexer.lineno += t.value.count('\n')
pass
# C string
t_STRING = r'\"([^\\\n]|(\\.))*?\"'
# C character literal
t_CHAR = r'\'([^\\\n]|(\\.))*?\''
# Define a rule so we can track line numbers
def t_newline(t):
r'\n+'
t.lexer.lineno += len(t.value)
# Error handling rule
def t_error(t):
global errors
print("Illegal character '%s'" % t.value[0])
errors += 1
t.lexer.skip(1)
#sys.exit(1)
acronyms = ('NZB', 'URL', 'ID', 'TLS', 'NNTP', 'IP')
def is_renamable(token):
# rename tokens having acronyms in their names
for ac in acronyms:
if token.find(ac) > -1:
return True
# do not rename everything else
return False
def construct_new_name(name):
if name == name.upper(): return None
new_name = name
for ac in acronyms:
if new_name.find(ac) > -1:
new_name = new_name.replace(ac, ac[0] + ac[1:].lower())
return new_name if name <> new_name else None
def collect_names(content):
lex.input(content)
candidates = set()
for tok in iter(lex.token, None):
if tok.type == 'WORD' and is_renamable(tok.value):
candidates.add(tok.value)
renamables = {}
for tok in candidates:
new_name = construct_new_name(tok)
if new_name:
renamables[tok] = new_name
return renamables
def perform_rename(content, names):
new_content = content
delta = 0
lex.input(content)
for tok in iter(lex.token, None):
if tok.type == 'WORD':
new_name = names.get(tok.value)
if new_name:
new_content = new_content[:tok.lexpos + delta] + new_name + new_content[tok.lexpos + delta + len(tok.value):]
delta += len(new_name) - len(tok.value)
return new_content
def process_file(filename):
content = open(filename, 'rb').read()
names = collect_names(content)
#print(names)
new_content = perform_rename(content, names)
#print(new_content)
open(filename, 'wb').write(new_content)
lex.lex()
fname = sys.argv[1]
if os.path.isfile(fname):
process_file(fname)
elif os.path.isdir(fname):
for root, dirs, files in os.walk(fname):
for filename in files:
if filename[0] == '.': continue
path = os.path.join(root, filename)
print(path)
sys.stdout.flush()
process_file(path)
if errors:
print('%s error(s)' % errors)
#!/usr/bin/env python
#
# Identifier renaming tool to refactor nzbget source code.
#
# Copyright (C) 2015 Andrey Prygunkov <hugbug@users.sourceforge.net>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with the program. If not, see <http://www.gnu.org/licenses/>.
#
import sys
import os
# Using PLY library by David M. Beazley (Dabeaz LLC)
# Get a copy from http://www.dabeaz.com/ply/, extract it and put file "lex.py"
# into the directory where this script is located.
import lex
errors = 0
tokens = (
'DIRECTIVE',
'WORD',
'NUMBER',
'STRING',
'CHAR')
# Preprocessor directive
t_DIRECTIVE = r'\#.*'
t_WORD = r'[a-zA-Z_$][0-9a-zA-Z_$]*'
t_NUMBER = r'0x[0-9a-fA-F]+|\d+'
# Literals, one character represents one token
literals = '[]<>{}():;=.,-+/?*!|&~^%\\'
# A string containing ignored characters (spaces and tabs)
t_ignore = ' \t'
# C or C++ comment (ignore)
def t_comment(t):
r'(/\*(.|\n)*?\*/)|(//.*)'
t.lexer.lineno += t.value.count('\n')
pass
# C string
t_STRING = r'\"([^\\\n]|(\\.))*?\"'
# C character literal
t_CHAR = r'\'([^\\\n]|(\\.))*?\''
# Define a rule so we can track line numbers
def t_newline(t):
r'\n+'
t.lexer.lineno += len(t.value)
# Error handling rule
def t_error(t):
global errors
print("Illegal character '%s'" % t.value[0])
errors += 1
t.lexer.skip(1)
#sys.exit(1)
def is_renamable(token):
# rename tokens starting with "g_"
return token.startswith("g_")
def construct_new_name(name):
ln = len(name)
ch1 = name[0]
ch2 = name[1] if ln > 1 else None
ch3 = name[2] if ln > 2 else None
ch4 = name[3] if ln > 3 else None
ch5 = name[4] if ln > 4 else None
new_name = None
if ch1 == 'g' and ch2 == '_' and ch3 in ['p', 'i', 'l', 'b', 't', 'e'] and ch4.upper() == ch4:
new_name = 'g_' + ch4.upper() + name[4:]
elif ch1 == 'g' and ch2 == '_' and ch3 == 's' and ch4 == 'z' and ch5.upper() == ch5:
new_name = 'g_' + ch5.upper() + name[5:]
return new_name
def collect_names(content):
lex.input(content)
candidates = set()
for tok in iter(lex.token, None):
if tok.type == 'WORD' and is_renamable(tok.value):
candidates.add(tok.value)
renamables = {}
for tok in candidates:
new_name = construct_new_name(tok)
if new_name:
renamables[tok] = new_name
return renamables
def perform_rename(content, names):
new_content = content
delta = 0
lex.input(content)
for tok in iter(lex.token, None):
if tok.type == 'WORD':
new_name = names.get(tok.value)
if new_name:
new_content = new_content[:tok.lexpos + delta] + new_name + new_content[tok.lexpos + delta + len(tok.value):]
delta += len(new_name) - len(tok.value)
return new_content
def process_file(filename):
content = open(filename, 'rb').read()
names = collect_names(content)
if want_rename:
new_content = perform_rename(content, names)
open(filename, 'wb').write(new_content)
else:
for r in names:
print('%s;%s' % (r, names[r]))
print('------------')
print('%s renamables' % len(names))
want_rename = True
lex.lex()
fname = sys.argv[1]
if os.path.isfile(fname):
process_file(fname)
elif os.path.isdir(fname):
for root, dirs, files in os.walk(fname):
for filename in files:
if filename[0] == '.': continue
path = os.path.join(root, filename)
print(path)
sys.stdout.flush()
process_file(path)
if errors:
print('%s error(s)' % errors)
#!/usr/bin/env python
#
# Identifier renaming tool to refactor nzbget source code.
#
# Copyright (C) 2015 Andrey Prygunkov <hugbug@users.sourceforge.net>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with the program. If not, see <http://www.gnu.org/licenses/>.
#
import sys
import os
# Using PLY library by David M. Beazley (Dabeaz LLC)
# Get a copy from http://www.dabeaz.com/ply/, extract it and put file "lex.py"
# into the directory where this script is located.
import lex
errors = 0
tokens = (
'DIRECTIVE',
'WORD',
'NUMBER',
'STRING',
'CHAR')
# Preprocessor directive
t_DIRECTIVE = r'\#.*'
t_WORD = r'[a-zA-Z_$][0-9a-zA-Z_$]*'
t_NUMBER = r'0x[0-9a-fA-F]+|\d+'
# Literals, one character represents one token
literals = '[]<>{}():;=.,-+/?*!|&~^%\\'
# A string containing ignored characters (spaces and tabs)
t_ignore = ' \t'
# C or C++ comment (ignore)
def t_comment(t):
r'(/\*(.|\n)*?\*/)|(//.*)'
t.lexer.lineno += t.value.count('\n')
pass
# C string
t_STRING = r'\"([^\\\n]|(\\.))*?\"'
# C character literal
t_CHAR = r'\'([^\\\n]|(\\.))*?\''
# Define a rule so we can track line numbers
def t_newline(t):
r'\n+'
t.lexer.lineno += len(t.value)
# Compute column.
# input is the input text string
# token is a token instance
def find_column(input, token):
last_cr = input.rfind('\n', 0, token.lexpos)
if last_cr < 0:
last_cr = 0
column = (token.lexpos - last_cr)
return column
# Error handling rule
def t_error(t):
global errors
print("Illegal character '%s'" % t.value[0])
errors += 1
t.lexer.skip(1)
#sys.exit(1)
def is_renamable(token):
# do not rename tokens consisting of lower case characters only
if token.lower() == token: return False
# do not rename tokens starting with an upper case characters
if token[0].upper() == token[0]: return False
# rename everything else
return True
def construct_new_name(name):
if name == 'bOK': return 'ok'
if name == 'iID': return 'id'
if name == 'm_iID': return 'm_id'
if name == 'bDelete': return 'deleteObj'
ln = len(name)
ch1 = name[0]
ch2 = name[1] if ln > 1 else None
ch3 = name[2] if ln > 2 else None
ch4 = name[3] if ln > 3 else None
ch5 = name[4] if ln > 4 else None
new_name = None
if ch1 in ['p', 'i', 'l', 'b', 't', 'e'] and ch2.upper() == ch2:
new_name = ch2.lower() + name[2:]
elif ch1 == 's' and ch2 == 'z' and ch3.upper() == ch3:
new_name = ch3.lower() + name[3:]
elif ch1 == 'm' and ch2 == '_' and ch3 in ['p', 'i', 'l', 'b', 't', 'e'] and ch4.upper() == ch4:
new_name = 'm_' + ch4.lower() + name[4:]
elif ch1 == 'm' and ch2 == '_' and ch3 == 's' and ch4 == 'z' and ch5.upper() == ch5:
new_name = 'm_' + ch5.lower() + name[5:]
elif ch1 == 'm' and ch2 == '_' and ch3.upper() == ch3:
new_name = 'm_' + ch3.lower() + name[3:]
elif name[:7] == 'm_mutex' and name[7].upper() == name[7]:
new_name = 'm_' + name[7].lower() + name[8:] + 'Mutex'
if new_name <> None:
new_name = new_name.replace('ID', 'Id')
new_name = new_name.replace('iD', 'id')
new_name = new_name.replace('NZB', 'Nzb')
new_name = new_name.replace('nZB', 'nzb')
new_name = new_name.replace('URL', 'Url')
new_name = new_name.replace('uRL', 'url')
return new_name
def collect_names(content):
lex.input(content)
candidates = set()
for tok in iter(lex.token, None):
if tok.type == 'WORD' and is_renamable(tok.value):
candidates.add(tok.value)
renamables = {}
for tok in candidates:
new_name = construct_new_name(tok)
if new_name:
renamables[tok] = new_name
return renamables
def perform_rename(content, names):
new_content = content
delta = 0
lex.input(content)
for tok in iter(lex.token, None):
if tok.type == 'WORD':
new_name = names.get(tok.value)
if new_name:
new_content = new_content[:tok.lexpos + delta] + new_name + new_content[tok.lexpos + delta + len(tok.value):]
delta += len(new_name) - len(tok.value)
return new_content
def process_file(filename):
content = open(filename, 'r').read()
names = collect_names(content)
if want_rename:
new_content = perform_rename(content, names)
open(filename, 'w').write(new_content)
else:
for r in names:
print('%s;%s' % (r, names[r]))
print('------------')
print('%s renamables' % len(names))
want_rename = True
lex.lex()
fname = sys.argv[1]
if os.path.isfile(fname):
process_file(fname)
elif os.path.isdir(fname):
for root, dirs, files in os.walk(fname):
for filename in files:
path = os.path.join(root, filename)
print(path)
sys.stdout.flush()
process_file(path)
if errors:
print('%s error(s)' % errors)
#!/usr/bin/env python
#
# Identifier renaming tool to refactor nzbget source code.
#
# Copyright (C) 2015 Andrey Prygunkov <hugbug@users.sourceforge.net>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with the program. If not, see <http://www.gnu.org/licenses/>.
#
import sys
import os
# Using PLY library by David M. Beazley (Dabeaz LLC)
# Get a copy from http://www.dabeaz.com/ply/, extract it and put file "lex.py"
# into the directory where this script is located.
import lex
errors = 0
dupes = 0
tokens = (
'DIRECTIVE',
'WORD',
'NUMBER',
'STRING',
'CHAR')
# Preprocessor directive
t_DIRECTIVE = r'\#.*'
t_WORD = r'[a-zA-Z_$][0-9a-zA-Z_$]*'
t_NUMBER = r'0x[0-9a-fA-F]+|\d+'
# Literals, one character represents one token
literals = '[]<>{}():;=.,-+/?*!|&~^%\\'
# A string containing ignored characters (spaces and tabs)
t_ignore = ' \t'
# C or C++ comment (ignore)
def t_comment(t):
r'(/\*(.|\n)*?\*/)|(//.*)'
t.lexer.lineno += t.value.count('\n')
pass
# C string
t_STRING = r'\"([^\\\n]|(\\.))*?\"'
# C character literal
t_CHAR = r'\'([^\\\n]|(\\.))*?\''
# Define a rule so we can track line numbers
def t_newline(t):
r'\n+'
t.lexer.lineno += len(t.value)
# Compute column.
# input is the input text string
# token is a token instance
def find_column(input, token):
last_cr = input.rfind('\n', 0, token.lexpos)
if last_cr < 0:
last_cr = 0
column = (token.lexpos - last_cr)
return column
# Error handling rule
def t_error(t):
global errors
print("Illegal character '%s'" % t.value[0])
errors += 1
t.lexer.skip(1)
#sys.exit(1)
def is_renamable(token):
# do not rename tokens consisting of lower case characters only
if token.lower() == token: return False
# do not rename tokens starting with an upper case characters
if token[0].upper() == token[0]: return False
# rename everything else
return True
def construct_new_name(name):
if name == 'bOK': return 'ok'
if name == 'iID': return 'id'
if name == 'm_iID': return 'm_id'
if name == 'bDelete': return 'deleteObj'
ln = len(name)
ch1 = name[0]
ch2 = name[1] if ln > 1 else None
ch3 = name[2] if ln > 2 else None
ch4 = name[3] if ln > 3 else None
ch5 = name[4] if ln > 4 else None
new_name = None
if ch1 in ['p', 'i', 'l', 'b', 't', 'e'] and ch2.upper() == ch2:
new_name = ch2.lower() + name[2:]
elif ch1 == 's' and ch2 == 'z' and ch3.upper() == ch3:
new_name = ch3.lower() + name[3:]
elif ch1 == 'm' and ch2 == '_' and ch3 in ['p', 'i', 'l', 'b', 't', 'e'] and ch4.upper() == ch4:
new_name = 'm_' + ch4.lower() + name[4:]
elif ch1 == 'm' and ch2 == '_' and ch3 == 's' and ch4 == 'z' and ch5.upper() == ch5:
new_name = 'm_' + ch5.lower() + name[5:]
elif ch1 == 'm' and ch2 == '_' and ch3.upper() == ch3:
new_name = 'm_' + ch3.lower() + name[3:]
elif name[:7] == 'm_mutex' and name[7].upper() == name[7]:
new_name = 'm_' + name[7].lower() + name[8:] + 'Mutex'
if new_name <> None:
new_name = new_name.replace('ID', 'Id')
new_name = new_name.replace('iD', 'id')
new_name = new_name.replace('NZB', 'Nzb')
new_name = new_name.replace('nZB', 'nzb')
new_name = new_name.replace('URL', 'Url')
new_name = new_name.replace('uRL', 'url')
return new_name
def collect_names(content):
lex.input(content)
candidates = set()
for tok in iter(lex.token, None):
if tok.type == 'WORD' and is_renamable(tok.value):
candidates.add(tok.value)
renamables = {}
for tok in candidates:
new_name = construct_new_name(tok)
if new_name:
renamables[tok] = new_name
return renamables
def validate_names(names):
global dupes
unique = set()
for tok in names:
new_name = names[tok]
if new_name in unique:
for other_tok in names:
if tok != other_tok and names[other_tok] == new_name:
print('%s,%s->%s' % (tok, other_tok, new_name))
dupes += 1
unique.add(new_name)
def process_file(filename):
content = open(filename, 'r').read()
names = collect_names(content)
validate_names(names)
lex.lex()
fname = sys.argv[1]
if os.path.isfile(fname):
process_file(fname)
elif os.path.isdir(fname):
for root, dirs, files in os.walk(fname):
for filename in files:
if filename[0] == '.': continue
path = os.path.join(root, filename)
print(path)
sys.stdout.flush()
process_file(path)
if dupes:
print('%s dupes(s)' % dupes)
if errors:
print('%s error(s)' % errors)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment