jeffjohnson9046/replace-8bit-chars-ascii.py

## replace-8bit-chars-ascii.py
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
import io

"""
Input file might look something like this:

cat input.txt
some ñ thing
foo ñññ

When the script is done, the output.txt will look like this:

cat output.txt
some x thing
foo xxx

There are other libraries that will do this automagically (e.g. unidecode), but in my case I wanted control over what gets
mapped to what.
"""
replacement_map = {
    ord(u'ñ'): u'x',
    # ... other mappings here...
}

with io.open('input.txt', encoding='utf-8') as data:
    with io.open('output.txt', 'w', encoding='ascii') as out:
        for row in data:
            out.write(row.translate(replacement_map))