Created
August 16, 2011 00:49
-
-
Save oyakata/1148226 to your computer and use it in GitHub Desktop.
正規表現: 練習帳
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding:utf-8 -*- | |
import re | |
def result(ptn, S): | |
tokens = [re.sub(ptn, ",", x) for x in S.split()] | |
print " ".join(tokens) | |
def main(): | |
"""数値文字列に3桁ごとにカンマを追加する。""" | |
cmm = r"(?<=\d)(?=(\d{3})+(\Z|\D))" | |
result(cmm, "1 10 100 1000 10000 100000 1000000") | |
result(cmm, r"__2011__ \3000 \5000-") | |
if __name__ == '__main__': | |
main() | |
#=> 1 10 100 1,000 10,000 100,000 1,000,000 | |
#=> __2,011__ \3,000 \5,000- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding:utf-8 -*- | |
def main(): | |
"""末尾が.aspx .asp ではないURLにマッチするパターン""" | |
# 否定先読みを使った方法 -- もっとスマートなパターンにできないものか。 | |
ptn = r"https?://(?!\S+\.aspx?)\S+" | |
print re.findall(ptn, "https://ikzo.net.ga?q=ora http://foo.bar/baz.aspx http://bar.baz.foo.asp http://www.google.co.jp") | |
if __name__ == "__main__": | |
main() | |
#=> ['https://ikzo.net.ga?q=ora', 'http://www.google.co.jp'] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment