Created
October 3, 2011 18:15
-
-
Save lsauer/1259821 to your computer and use it in GitHub Desktop.
RegEx - matching numbers only in select regions (JS RegEx vs Python PCRE)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#author: lo sauer 2011 - lsauer.com | |
#demonstrating the difference in RegEx engines | |
Python: | |
import re | |
a="123 test 42 <!-- comment 345 commentEnd --> 678 test2 348" | |
filter(None, re.findall('(?:\<\!--.+\d+.+--\>)|(\d+)',a)) | |
#With RegEx lookbehinds; for demonstration only - not very practical (slow!) | |
re.findall('(\d+)(?!(?:[^<]+|<(?!!--))*-->)',a)) | |
>>>['123', '42', '678', '2', '348'] | |
JS: | |
#JS has no lookbehinds; (?:..) is not captured due to the global flag being set | |
a="123 test 42 <!-- comment 345 commentEnd --> 678 test2 348" | |
a.match(/(?:\<\!--.+\d+.+--\>)|(\d+)/g) | |
>>>["123", "42", "<!-- comment 345 commentEnd -->", "678", "2", "348"] | |
a.match(/(?=\<\!--.+\d+.+--\>)|(\d+)/g) | |
>>>["123", "42", "", "345", "678", "2", "348"] | |
a.match(/(?=\<\!--.+\d+.+--\>)|(\d+)/g).filter(Number) | |
>>>["123", "42", "345", "678", "2", "348"] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment