Skip to content

Instantly share code, notes, and snippets.

@ubershmekel
Created May 26, 2013 16:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ubershmekel/5653329 to your computer and use it in GitHub Desktop.
Save ubershmekel/5653329 to your computer and use it in GitHub Desktop.
Censor out ascii art, made for Candy Box
import pprint
import os
from collections import Counter
ascii_markers = "|\\/_\" ()n"
def ascii_prob(line):
line = line.strip()
counts = Counter(line)
total = len(line)
if total == 0:
return 0
not_ascii = ['break', 'return', 'else', 'case']
for phrase in not_ascii:
if phrase in line:
return 0
ascii_count = 0
for c in ascii_markers:
if c in counts:
ascii_count += counts[c]
return ascii_count * 1.0 / total
def censor_line(line):
if ascii_prob(line) < 0.7:
return line
for mark in ascii_markers:
line = line.replace(mark, '#')
return line
def main():
src_dir = 'cbox123'
dst_dir = src_dir + '_censored'
probs = []
for root, dirs, files in os.walk(src_dir):
tdir = root.replace(src_dir, dst_dir)
if not os.path.exists(tdir):
os.makedirs(tdir)
for fname in files:
print fname
fpath = os.path.join(root, fname)
out = open(fpath.replace(src_dir, dst_dir), 'w')
for line in open(fpath):
probs.append((ascii_prob(line), line))
out.write(censor_line(line))
pprint.pprint(sorted(probs)[-2500:-2000])
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment