Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Convert dumb quotes to smart quotes in Python
#!/usr/bin/env python3
"""
Inspired by : https://gist.github.com/davidtheclark/5521432
Converts dumb quotes to smart quotes, -- to EM_DASH and ... to ELLIPSES.
"""
import sys
import re
import os
class bcolors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
EN_DASH = "\u2013"
EM_DASH = "\u2014"
L_SQUOTE = "\u2018"
R_SQUOTE = "\u2019"
L_DQUOTE = "\u201C"
R_DQUOTE = "\u201D"
def error(msg):
print( bcolors.FAIL + "ERROR: {}".format(msg) + bcolors.ENDC, file=sys.stderr )
sys.exit(1)
def exists(filen):
return os.path.exists(filen)
def dumb_to_smart_quotes(string):
"""Takes a string and returns it with dumb quotes, single and double,
replaced by smart quotes. Accounts for the possibility of HTML tags
within the string."""
# Replace -- with EM_DASH
string = string.replace('--', EM_DASH)
# Replace ... with ELLIPSIS
string = string.replace('...', ELLIPSIS)
# Find dumb double quotes coming directly after letters or punctuation,
# and replace them with right double quotes.
string = re.sub(r'([a-zA-Z0-9.,?!;:\'\"])"', r'\1{}'.format(R_DQUOTE), string)
# Find any remaining dumb double quotes and replace them with
# left double quotes.
string = string.replace('"', L_DQUOTE)
# Reverse: Find any SMART quotes that have been (mistakenly) placed around HTML
# attributes (following =) and replace them with dumb quotes.
string = re.sub(r'={}(.*?){}'.format(L_DQUOTE, R_DQUOTE), r'="\1"', string)
# Follow the same process with dumb/smart single quotes
string = re.sub(r"([a-zA-Z0-9.,?!;:\"\'])'", r'\1{}'.format(R_SQUOTE), string)
string = string.replace("'", L_SQUOTE)
string = re.sub(r'={}(.*?){}'.format(L_SQUOTE, R_SQUOTE), r"='\1'", string)
return string
if __name__ == '__main__':
if len(sys.argv) < 3:
print("Usage: python3 smartquote.py filename.txt smart.txt")
sys.exit(1)
filein = sys.argv[1]
fileout = sys.argv[2]
if filein == fileout:
error( "Input file cannot be the same as output file" )
if not exists(filein):
error("File {} does not exist.".format(filein))
if exists(fileout):
error("File {} already exists.".format(fileout))
result = ""
with open(filein, 'r', encoding='utf-8') as f:
contents = f.read()
result = dumb_to_smart_quotes(contents)
with open(fileout, 'w', encoding='utf-8') as f:
f.write(result)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.