Skip to content

Instantly share code, notes, and snippets.

@moreati
Last active October 14, 2021 19:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save moreati/f1167727291401af96becd76c8b50418 to your computer and use it in GitHub Desktop.
Save moreati/f1167727291401af96becd76c8b50418 to your computer and use it in GitHub Desktop.
Trying regular epxressions purported to match double quoted strings with backslash escapes
#!/usr/bin/env python3
"""
Trying regular epxressions purported to match double quoted strings with backslash escapes
Syntax changes made vs sources
- Changed capture groups to non-capture groups () -> (?:)
- Removed escaping of double quotes \" -> "
"""
import re
CANDIDATES = [
# https://stackoverflow.com/questions/249791/regex-for-quoted-string-with-escaping-quotes
(r'"(?:\\.|[^\"])*"', 'https://stackoverflow.com/a/1016356', None),
(r'"(?:[^"\\]*(?:\\.[^"\\]*)*)"', 'https://stackoverflow.com/a/10786066', None),
# https://stackoverflow.com/questions/5695240/php-regex-to-ignore-escaped-quotes-within-quotes
(r'"(?:[^"\\]|\\.)*"', 'https://stackoverflow.com/a/5696141', 'good')
(r'"(?:(?=[^"\\]+)|\\.)*"', 'https://stackoverflow.com/a/5696141', 'better'),
(r'"[^"\\]*(?:\\.[^"\\]*)*"', 'https://stackoverflow.com/a/5696141', 'best'),
# https://github.com/lark-parser/lark
# _STRING_INNER : /.*?/
# _STRING_ESC_INNER : _STRING_INNER /(?<!\\)(\\\\)*?/
# ESCAPED_STRING : "\"" _STRING_ESC_INNER "\""
(r'".*?(?<!\\)(?:\\\\)*?"', 'https://github.com/lark-parser/lark/blob/ebb15f75721a161feb293c4dcdc613c04539d9eb/lark/grammars/common.lark', None),
]
if __name__ == '__main__':
for i, (expr, url) in enumerate(CANDIDATES):
try:
pattern = re.compile(expr, re.DOTALL)
except re.error:
print(i, expr)
raise
print(i, pattern.findall(r' "", "\"", "\\", "\n" '))
@moreati
Copy link
Author

moreati commented Oct 13, 2021

$ ./escaped_string.py 
0 ['""', '"\\""', '"\\\\"', '"\\n"']
1 ['""', '"\\""', '"\\\\"', '"\\n"']
2 ['""', '"\\""', '"\\\\"', '"\\n"']
3 ['""', '"\\""', '"\\\\"', '"\\n"']
4 ['""', '"\\""', '"\\\\"', '"\\n"']
5 ['""', '"\\""', '"\\\\"', '"\\n"']

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment