Last active
March 21, 2019 18:56
-
-
Save r00tten/840b1932609ea6960dafde7cf2c41f23 to your computer and use it in GitHub Desktop.
asciify unicode strings within the files. I didn't have time to test it properly. If you find a bug, just tell me.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# [X] Reading from file | |
# [X] Splitting word by word | |
# [X] Split ascii and unicode | |
# [X] Determine a range and apply filter | |
# [X] Collect all the unique ones in an array | |
# [X] Create random english words for them | |
# [X] Create an updated file | |
# [ ] Work with folder and file as a input | |
#!/usr/bin/python | |
# encoding: utf-8 | |
import sys | |
import urllib2 | |
from random import randint | |
if len(sys.argv) < 2: | |
print("Please enter file to asciify!") | |
exit() | |
def checkUnique(string, arrayUni): | |
place = -1 | |
for i, uni in enumerate(arrayUni): | |
if len(uni) is not len(string): | |
continue | |
else: | |
for index in range(len(uni)): | |
if uni[index] is not string[index]: | |
break | |
elif index is len(uni) - 1: | |
place = i | |
break | |
if place is not -1: | |
break | |
return place | |
def addToArrays(arrayUni, word, arrayRand, words): | |
if word is not "": | |
arrayUni.append(word) | |
arrayRand.append(words[randint(0, len(words) - 1)]) | |
# https://stackoverflow.com/questions/18834636/random-word-generator-python | |
word_site = "http://svnweb.freebsd.org/csrg/share/dict/words?view=co&content-type=text/plain" | |
response = urllib2.urlopen(word_site) | |
txt = response.read() | |
WORDS = txt.splitlines() | |
arrayUni = [] | |
arrayRand = [] | |
uniStr = "" | |
start = -1 | |
end = -1 | |
file = open("rep_" + sys.argv[1], 'w') | |
with open(sys.argv[1],'r') as f: | |
for line in f: | |
for word in line.split(): | |
for i, char in enumerate(word): | |
if ord(char) > 127: | |
if start is -1: | |
start = i | |
elif ord(char) is 46 or ord(char) is 40 or ord(char) is 41 or ord(char) is 59: | |
if start is not -1: | |
end = i | |
uniStr = word[start:end] | |
check = checkUnique(uniStr, arrayUni) | |
if check is -1: | |
addToArrays(arrayUni, uniStr, arrayRand, WORDS) | |
line = line.replace(uniStr, "rep_" + arrayRand[len(arrayRand) - 1]) | |
else: | |
line = line.replace(uniStr, "rep_" + arrayRand[check]) | |
start = -1 | |
end = -1 | |
if start is not -1 and end is -1: | |
end = i | |
uniStr = word[start:end] | |
check = checkUnique(uniStr, arrayUni) | |
if check is -1: | |
addToArrays(arrayUni, uniStr, arrayRand, WORDS) | |
line = line.replace(uniStr, "rep_" + arrayRand[len(arrayRand) - 1]) | |
else: | |
line = line.replace(uniStr, "rep_" + arrayRand[check]) | |
start = -1 | |
end = -1 | |
file.write(line) | |
file.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment