mobilestack/similar_files_parser

## similar_files_parser
"""
badwords source: https://github.com/shutterstock/List-of-Dirty-Naughty-Obscene-and-Otherwise-Bad-Words/blob/master/en
badwords source 2: http://urbanoalvarez.es/blog/2008/04/04/bad-words-list/
"""

f = open("badwords.txt")
lines = f.readlines()
lines2 = []
for i in lines:
    #remove trailing and prepending space
    lines2.append(i.strip())

lines3 = []
for i in lines2:
    #remove spaces
    b = i.replace(" ", "").replace("-","")
    line3.append(b)

"""
for i in lines:
    #check if is alphabetics
    #so remove numbers and marks
    if not i.isalpha():
        lines.remove(i)
        #note that this will not clear all items which is not alpha
        #has to do more than serveral times, and print out to check
        #or use the following
"""
line4 = []
for i in line3:
    if i.isalpha():
        #from capital to lower case
        line4.append(i.lower())


output = []
for i in lines4:
    if i is not in output:
        output.append(i)

f2 = open("badwords2.txt", "w")
for x in output:
    f2.write("%s\n", x)

f2.close()
f.close()
	"""
	badwords source: https://github.com/shutterstock/List-of-Dirty-Naughty-Obscene-and-Otherwise-Bad-Words/blob/master/en
	badwords source 2: http://urbanoalvarez.es/blog/2008/04/04/bad-words-list/
	"""

	f = open("badwords.txt")
	lines = f.readlines()
	lines2 = []
	for i in lines:
	#remove trailing and prepending space
	lines2.append(i.strip())

	lines3 = []
	for i in lines2:
	#remove spaces
	b = i.replace(" ", "").replace("-","")
	line3.append(b)

	"""
	for i in lines:
	#check if is alphabetics
	#so remove numbers and marks
	if not i.isalpha():
	lines.remove(i)
	#note that this will not clear all items which is not alpha
	#has to do more than serveral times, and print out to check
	#or use the following
	"""
	line4 = []
	for i in line3:
	if i.isalpha():
	#from capital to lower case
	line4.append(i.lower())


	output = []
	for i in lines4:
	if i is not in output:
	output.append(i)

	f2 = open("badwords2.txt", "w")
	for x in output:
	f2.write("%s\n", x)

	f2.close()
	f.close()