hugbug/rename_acronyms.py

## rename_acronyms.py
#!/usr/bin/env python
#
# Identifier renaming tool to refactor nzbget source code.
#
# Copyright (C) 2015 Andrey Prygunkov <hugbug@users.sourceforge.net>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.    See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with the program.  If not, see <http://www.gnu.org/licenses/>.
#

import sys
import os
import lex

errors = 0

tokens = (
    'DIRECTIVE',
    'WORD',
    'NUMBER',
    'STRING',
    'CHAR')

# Preprocessor directive
t_DIRECTIVE = r'\#.*'

t_WORD = r'[a-zA-Z_$][0-9a-zA-Z_$]*'
t_NUMBER = r'0x[0-9a-fA-F]+|\d+'

# Literals, one character represents one token
literals = '[]<>{}():;=.,-+/?*!|&~^%\\'

# A string containing ignored characters (spaces and tabs)
t_ignore = ' \t'

# C or C++ comment (ignore)
def t_comment(t):
    r'(/\*(.|\n)*?\*/)|(//.*)'
    t.lexer.lineno += t.value.count('\n')
    pass

# C string
t_STRING = r'\"([^\\\n]|(\\.))*?\"'

# C character literal
t_CHAR = r'\'([^\\\n]|(\\.))*?\''

# Define a rule so we can track line numbers
def t_newline(t):
    r'\n+'
    t.lexer.lineno += len(t.value)


# Error handling rule
def t_error(t):
    global errors
    print("Illegal character '%s'" % t.value[0])
    errors += 1
    t.lexer.skip(1)
    #sys.exit(1)


acronyms = ('NZB', 'URL', 'ID', 'TLS', 'NNTP', 'IP')

def is_renamable(token):
    # rename tokens having acronyms in their names
    for ac in acronyms:
        if token.find(ac) > -1:
            return True

    # do not rename everything else
    return False

def construct_new_name(name):
    if name == name.upper(): return None

    new_name = name
    for ac in acronyms:
        if new_name.find(ac) > -1:
            new_name = new_name.replace(ac, ac[0] + ac[1:].lower())

    return new_name if name <> new_name else None


def collect_names(content):
    lex.input(content)
    candidates = set()
    for tok in iter(lex.token, None):
        if tok.type == 'WORD' and is_renamable(tok.value):
            candidates.add(tok.value)

    renamables = {}
    for tok in candidates:
        new_name = construct_new_name(tok)
        if new_name:
            renamables[tok] = new_name

    return renamables


def perform_rename(content, names):
    new_content = content
    delta = 0
    lex.input(content)
    for tok in iter(lex.token, None):
        if tok.type == 'WORD':
            new_name = names.get(tok.value)
            if new_name:
                new_content = new_content[:tok.lexpos + delta] + new_name + new_content[tok.lexpos + delta + len(tok.value):]
                delta += len(new_name) - len(tok.value)

    return new_content


def process_file(filename):
    content = open(filename, 'rb').read()
    names = collect_names(content)
    #print(names)
    new_content = perform_rename(content, names)
    #print(new_content)
    open(filename, 'wb').write(new_content)


lex.lex()
fname = sys.argv[1]

if os.path.isfile(fname):
    process_file(fname)
elif os.path.isdir(fname):
    for root, dirs, files in os.walk(fname):
        for filename in files:
            if filename[0] == '.': continue
            path = os.path.join(root, filename)
            print(path)
            sys.stdout.flush()
            process_file(path)


if errors:
    print('%s error(s)' % errors)

## rename_globals.py
#!/usr/bin/env python
#
# Identifier renaming tool to refactor nzbget source code.
#
# Copyright (C) 2015 Andrey Prygunkov <hugbug@users.sourceforge.net>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.    See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with the program.  If not, see <http://www.gnu.org/licenses/>.
#

import sys
import os

# Using PLY library by David M. Beazley (Dabeaz LLC)
# Get a copy from http://www.dabeaz.com/ply/, extract it and put file "lex.py"
# into the directory where this script is located.
import lex

errors = 0

tokens = (
    'DIRECTIVE',
    'WORD',
    'NUMBER',
    'STRING',
    'CHAR')

# Preprocessor directive
t_DIRECTIVE = r'\#.*'

t_WORD = r'[a-zA-Z_$][0-9a-zA-Z_$]*'
t_NUMBER = r'0x[0-9a-fA-F]+|\d+'

# Literals, one character represents one token
literals = '[]<>{}():;=.,-+/?*!|&~^%\\'

# A string containing ignored characters (spaces and tabs)
t_ignore = ' \t'

# C or C++ comment (ignore)
def t_comment(t):
    r'(/\*(.|\n)*?\*/)|(//.*)'
    t.lexer.lineno += t.value.count('\n')
    pass

# C string
t_STRING = r'\"([^\\\n]|(\\.))*?\"'

# C character literal
t_CHAR = r'\'([^\\\n]|(\\.))*?\''

# Define a rule so we can track line numbers
def t_newline(t):
    r'\n+'
    t.lexer.lineno += len(t.value)

# Error handling rule
def t_error(t):
    global errors
    print("Illegal character '%s'" % t.value[0])
    errors += 1
    t.lexer.skip(1)
    #sys.exit(1)


def is_renamable(token):

    # rename tokens starting with "g_"
    return token.startswith("g_")

def construct_new_name(name):

    ln = len(name)
    ch1 = name[0]
    ch2 = name[1] if ln > 1 else None
    ch3 = name[2] if ln > 2 else None
    ch4 = name[3] if ln > 3 else None
    ch5 = name[4] if ln > 4 else None

    new_name = None

    if ch1 == 'g' and ch2 == '_' and ch3 in ['p', 'i', 'l', 'b', 't', 'e'] and ch4.upper() == ch4:
        new_name = 'g_' + ch4.upper() + name[4:]
    elif ch1 == 'g' and ch2 == '_' and ch3 == 's' and ch4 == 'z' and ch5.upper() == ch5:
        new_name = 'g_' + ch5.upper() + name[5:]

    return new_name


def collect_names(content):
    lex.input(content)
    candidates = set()
    for tok in iter(lex.token, None):
        if tok.type == 'WORD' and is_renamable(tok.value):
            candidates.add(tok.value)

    renamables = {}
    for tok in candidates:
        new_name = construct_new_name(tok)
        if new_name:
            renamables[tok] = new_name

    return renamables


def perform_rename(content, names):
    new_content = content
    delta = 0
    lex.input(content)
    for tok in iter(lex.token, None):
        if tok.type == 'WORD':
            new_name = names.get(tok.value)
            if new_name:
                new_content = new_content[:tok.lexpos + delta] + new_name + new_content[tok.lexpos + delta + len(tok.value):]
                delta += len(new_name) - len(tok.value)

    return new_content


def process_file(filename):
    content = open(filename, 'rb').read()

    names = collect_names(content)

    if want_rename:
        new_content = perform_rename(content, names)
        open(filename, 'wb').write(new_content)
    else:
        for r in names:
            print('%s;%s' % (r, names[r]))
        print('------------')
        print('%s renamables' % len(names))


want_rename = True

lex.lex()
fname = sys.argv[1]

if os.path.isfile(fname):
    process_file(fname)
elif os.path.isdir(fname):
    for root, dirs, files in os.walk(fname):
        for filename in files:
            if filename[0] == '.': continue
            path = os.path.join(root, filename)
            print(path)
            sys.stdout.flush()
            process_file(path)

if errors:
    print('%s error(s)' % errors)

## rename_vars.py
#!/usr/bin/env python
#
# Identifier renaming tool to refactor nzbget source code.
#
# Copyright (C) 2015 Andrey Prygunkov <hugbug@users.sourceforge.net>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.    See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with the program.  If not, see <http://www.gnu.org/licenses/>.
#

import sys
import os

# Using PLY library by David M. Beazley (Dabeaz LLC)
# Get a copy from http://www.dabeaz.com/ply/, extract it and put file "lex.py"
# into the directory where this script is located.
import lex

errors = 0

tokens = (
    'DIRECTIVE',
    'WORD',
    'NUMBER',
    'STRING',
    'CHAR')

# Preprocessor directive
t_DIRECTIVE = r'\#.*'

t_WORD = r'[a-zA-Z_$][0-9a-zA-Z_$]*'
t_NUMBER = r'0x[0-9a-fA-F]+|\d+'

# Literals, one character represents one token
literals = '[]<>{}():;=.,-+/?*!|&~^%\\'

# A string containing ignored characters (spaces and tabs)
t_ignore = ' \t'

# C or C++ comment (ignore)
def t_comment(t):
    r'(/\*(.|\n)*?\*/)|(//.*)'
    t.lexer.lineno += t.value.count('\n')
    pass

# C string
t_STRING = r'\"([^\\\n]|(\\.))*?\"'

# C character literal
t_CHAR = r'\'([^\\\n]|(\\.))*?\''

# Define a rule so we can track line numbers
def t_newline(t):
    r'\n+'
    t.lexer.lineno += len(t.value)

# Compute column.
#     input is the input text string
#     token is a token instance
def find_column(input, token):
    last_cr = input.rfind('\n', 0, token.lexpos)
    if last_cr < 0:
	last_cr = 0
    column = (token.lexpos - last_cr)
    return column

# Error handling rule
def t_error(t):
    global errors
    print("Illegal character '%s'" % t.value[0])
    errors += 1
    t.lexer.skip(1)
    #sys.exit(1)


def is_renamable(token):

    # do not rename tokens consisting of lower case characters only
    if token.lower() == token: return False

    # do not rename tokens starting with an upper case characters
    if token[0].upper() == token[0]: return False

    # rename everything else
    return True

def construct_new_name(name):

    if name == 'bOK': return 'ok'
    if name == 'iID': return 'id'
    if name == 'm_iID': return 'm_id'
    if name == 'bDelete': return 'deleteObj'

    ln = len(name)
    ch1 = name[0]
    ch2 = name[1] if ln > 1 else None
    ch3 = name[2] if ln > 2 else None
    ch4 = name[3] if ln > 3 else None
    ch5 = name[4] if ln > 4 else None

    new_name = None

    if ch1 in ['p', 'i', 'l', 'b', 't', 'e'] and ch2.upper() == ch2:
        new_name = ch2.lower() + name[2:]
    elif ch1 == 's' and ch2 == 'z' and ch3.upper() == ch3:
        new_name = ch3.lower() + name[3:]
    elif ch1 == 'm' and ch2 == '_' and ch3 in ['p', 'i', 'l', 'b', 't', 'e'] and ch4.upper() == ch4:
        new_name = 'm_' + ch4.lower() + name[4:]
    elif ch1 == 'm' and ch2 == '_' and ch3 == 's' and ch4 == 'z' and ch5.upper() == ch5:
        new_name = 'm_' + ch5.lower() + name[5:]
    elif ch1 == 'm' and ch2 == '_' and ch3.upper() == ch3:
        new_name = 'm_' + ch3.lower() + name[3:]
    elif name[:7] == 'm_mutex' and name[7].upper() == name[7]:
        new_name = 'm_' + name[7].lower() + name[8:] + 'Mutex'

    if new_name <> None:
        new_name = new_name.replace('ID', 'Id')
        new_name = new_name.replace('iD', 'id')
        new_name = new_name.replace('NZB', 'Nzb')
        new_name = new_name.replace('nZB', 'nzb')
        new_name = new_name.replace('URL', 'Url')
        new_name = new_name.replace('uRL', 'url')

    return new_name


def collect_names(content):
    lex.input(content)
    candidates = set()
    for tok in iter(lex.token, None):
        if tok.type == 'WORD' and is_renamable(tok.value):
            candidates.add(tok.value)

    renamables = {}
    for tok in candidates:
        new_name = construct_new_name(tok)
        if new_name:
            renamables[tok] = new_name

    return renamables


def perform_rename(content, names):
    new_content = content
    delta = 0
    lex.input(content)
    for tok in iter(lex.token, None):
        if tok.type == 'WORD':
            new_name = names.get(tok.value)
            if new_name:
                new_content = new_content[:tok.lexpos + delta] + new_name + new_content[tok.lexpos + delta + len(tok.value):]
                delta += len(new_name) - len(tok.value)

    return new_content


def process_file(filename):
    content = open(filename, 'r').read()

    names = collect_names(content)

    if want_rename:
        new_content = perform_rename(content, names)
        open(filename, 'w').write(new_content)
    else:
        for r in names:
            print('%s;%s' % (r, names[r]))
        print('------------')
        print('%s renamables' % len(names))


want_rename = True

lex.lex()
fname = sys.argv[1]

if os.path.isfile(fname):
    process_file(fname)
elif os.path.isdir(fname):
    for root, dirs, files in os.walk(fname):
        for filename in files:
            path = os.path.join(root, filename)
            print(path)
            sys.stdout.flush()
            process_file(path)

if errors:
    print('%s error(s)' % errors)

## rename_vars_warn.py
#!/usr/bin/env python
#
# Identifier renaming tool to refactor nzbget source code.
#
# Copyright (C) 2015 Andrey Prygunkov <hugbug@users.sourceforge.net>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.    See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with the program.  If not, see <http://www.gnu.org/licenses/>.
#

import sys
import os

# Using PLY library by David M. Beazley (Dabeaz LLC)
# Get a copy from http://www.dabeaz.com/ply/, extract it and put file "lex.py"
# into the directory where this script is located.
import lex

errors = 0
dupes = 0

tokens = (
    'DIRECTIVE',
    'WORD',
    'NUMBER',
    'STRING',
    'CHAR')

# Preprocessor directive
t_DIRECTIVE = r'\#.*'

t_WORD = r'[a-zA-Z_$][0-9a-zA-Z_$]*'
t_NUMBER = r'0x[0-9a-fA-F]+|\d+'

# Literals, one character represents one token
literals = '[]<>{}():;=.,-+/?*!|&~^%\\'

# A string containing ignored characters (spaces and tabs)
t_ignore = ' \t'

# C or C++ comment (ignore)
def t_comment(t):
    r'(/\*(.|\n)*?\*/)|(//.*)'
    t.lexer.lineno += t.value.count('\n')
    pass

# C string
t_STRING = r'\"([^\\\n]|(\\.))*?\"'

# C character literal
t_CHAR = r'\'([^\\\n]|(\\.))*?\''

# Define a rule so we can track line numbers
def t_newline(t):
    r'\n+'
    t.lexer.lineno += len(t.value)

# Compute column.
#     input is the input text string
#     token is a token instance
def find_column(input, token):
    last_cr = input.rfind('\n', 0, token.lexpos)
    if last_cr < 0:
	last_cr = 0
    column = (token.lexpos - last_cr)
    return column

# Error handling rule
def t_error(t):
    global errors
    print("Illegal character '%s'" % t.value[0])
    errors += 1
    t.lexer.skip(1)
    #sys.exit(1)


def is_renamable(token):

    # do not rename tokens consisting of lower case characters only
    if token.lower() == token: return False

    # do not rename tokens starting with an upper case characters
    if token[0].upper() == token[0]: return False

    # rename everything else
    return True

def construct_new_name(name):

    if name == 'bOK': return 'ok'
    if name == 'iID': return 'id'
    if name == 'm_iID': return 'm_id'
    if name == 'bDelete': return 'deleteObj'

    ln = len(name)
    ch1 = name[0]
    ch2 = name[1] if ln > 1 else None
    ch3 = name[2] if ln > 2 else None
    ch4 = name[3] if ln > 3 else None
    ch5 = name[4] if ln > 4 else None

    new_name = None

    if ch1 in ['p', 'i', 'l', 'b', 't', 'e'] and ch2.upper() == ch2:
        new_name = ch2.lower() + name[2:]
    elif ch1 == 's' and ch2 == 'z' and ch3.upper() == ch3:
        new_name = ch3.lower() + name[3:]
    elif ch1 == 'm' and ch2 == '_' and ch3 in ['p', 'i', 'l', 'b', 't', 'e'] and ch4.upper() == ch4:
        new_name = 'm_' + ch4.lower() + name[4:]
    elif ch1 == 'm' and ch2 == '_' and ch3 == 's' and ch4 == 'z' and ch5.upper() == ch5:
        new_name = 'm_' + ch5.lower() + name[5:]
    elif ch1 == 'm' and ch2 == '_' and ch3.upper() == ch3:
        new_name = 'm_' + ch3.lower() + name[3:]
    elif name[:7] == 'm_mutex' and name[7].upper() == name[7]:
        new_name = 'm_' + name[7].lower() + name[8:] + 'Mutex'

    if new_name <> None:
        new_name = new_name.replace('ID', 'Id')
        new_name = new_name.replace('iD', 'id')
        new_name = new_name.replace('NZB', 'Nzb')
        new_name = new_name.replace('nZB', 'nzb')
        new_name = new_name.replace('URL', 'Url')
        new_name = new_name.replace('uRL', 'url')

    return new_name


def collect_names(content):
    lex.input(content)
    candidates = set()
    for tok in iter(lex.token, None):
        if tok.type == 'WORD' and is_renamable(tok.value):
            candidates.add(tok.value)

    renamables = {}
    for tok in candidates:
        new_name = construct_new_name(tok)
        if new_name:
            renamables[tok] = new_name

    return renamables


def validate_names(names):
    global dupes
    unique = set()
    for tok in names:
        new_name = names[tok]
        if new_name in unique:
            for other_tok in names:
                if tok != other_tok and names[other_tok] == new_name:
                    print('%s,%s->%s' % (tok, other_tok, new_name))
                    dupes += 1
        unique.add(new_name)


def process_file(filename):
    content = open(filename, 'r').read()
    names = collect_names(content)
    validate_names(names)


lex.lex()
fname = sys.argv[1]

if os.path.isfile(fname):
    process_file(fname)
elif os.path.isdir(fname):
    for root, dirs, files in os.walk(fname):
        for filename in files:
            if filename[0] == '.': continue
            path = os.path.join(root, filename)
            print(path)
            sys.stdout.flush()
            process_file(path)

if dupes:
    print('%s dupes(s)' % dupes)

if errors:
    print('%s error(s)' % errors)
	#!/usr/bin/env python
	#
	# Identifier renaming tool to refactor nzbget source code.
	#
	# Copyright (C) 2015 Andrey Prygunkov <hugbug@users.sourceforge.net>
	#
	# This program is free software; you can redistribute it and/or modify it
	# under the terms of the GNU Lesser General Public License as published by
	# the Free Software Foundation; either version 3 of the License, or
	# (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU Lesser General Public License for more details.
	#
	# You should have received a copy of the GNU Lesser General Public License
	# along with the program. If not, see <http://www.gnu.org/licenses/>.
	#

	import sys
	import os
	import lex

	errors = 0

	tokens = (
	'DIRECTIVE',
	'WORD',
	'NUMBER',
	'STRING',
	'CHAR')

	# Preprocessor directive
	t_DIRECTIVE = r'\#.*'

	t_WORD = r'[a-zA-Z_$][0-9a-zA-Z_$]*'
	t_NUMBER = r'0x[0-9a-fA-F]+\|\d+'

	# Literals, one character represents one token
	literals = '[]<>{}():;=.,-+/?*!\|&~^%\\'

	# A string containing ignored characters (spaces and tabs)
	t_ignore = ' \t'

	# C or C++ comment (ignore)
	def t_comment(t):
	r'(/\(.\|\n)?\/)\|(//.)'
	t.lexer.lineno += t.value.count('\n')
	pass

	# C string
	t_STRING = r'\"([^\\\n]\|(\\.))*?\"'

	# C character literal
	t_CHAR = r'\'([^\\\n]\|(\\.))*?\''

	# Define a rule so we can track line numbers
	def t_newline(t):
	r'\n+'
	t.lexer.lineno += len(t.value)


	# Error handling rule
	def t_error(t):
	global errors
	print("Illegal character '%s'" % t.value[0])
	errors += 1
	t.lexer.skip(1)
	#sys.exit(1)


	acronyms = ('NZB', 'URL', 'ID', 'TLS', 'NNTP', 'IP')

	def is_renamable(token):
	# rename tokens having acronyms in their names
	for ac in acronyms:
	if token.find(ac) > -1:
	return True

	# do not rename everything else
	return False

	def construct_new_name(name):
	if name == name.upper(): return None

	new_name = name
	for ac in acronyms:
	if new_name.find(ac) > -1:
	new_name = new_name.replace(ac, ac[0] + ac[1:].lower())

	return new_name if name <> new_name else None


	def collect_names(content):
	lex.input(content)
	candidates = set()
	for tok in iter(lex.token, None):
	if tok.type == 'WORD' and is_renamable(tok.value):
	candidates.add(tok.value)

	renamables = {}
	for tok in candidates:
	new_name = construct_new_name(tok)
	if new_name:
	renamables[tok] = new_name

	return renamables


	def perform_rename(content, names):
	new_content = content
	delta = 0
	lex.input(content)
	for tok in iter(lex.token, None):
	if tok.type == 'WORD':
	new_name = names.get(tok.value)
	if new_name:
	new_content = new_content[:tok.lexpos + delta] + new_name + new_content[tok.lexpos + delta + len(tok.value):]
	delta += len(new_name) - len(tok.value)

	return new_content


	def process_file(filename):
	content = open(filename, 'rb').read()
	names = collect_names(content)
	#print(names)
	new_content = perform_rename(content, names)
	#print(new_content)
	open(filename, 'wb').write(new_content)


	lex.lex()
	fname = sys.argv[1]

	if os.path.isfile(fname):
	process_file(fname)
	elif os.path.isdir(fname):
	for root, dirs, files in os.walk(fname):
	for filename in files:
	if filename[0] == '.': continue
	path = os.path.join(root, filename)
	print(path)
	sys.stdout.flush()
	process_file(path)


	if errors:
	print('%s error(s)' % errors)