Skip to content

Instantly share code, notes, and snippets.

@hanxiaomax hanxiaomax/match.py

Last active Aug 29, 2015
Embed
What would you like to do?
正则表达式基础学习笔记
#coding:utf-8
import re
#---------------------------
pattern=re.compile(r'.ello')#首先编译正则表达式,主要是为了复用
match=pattern.match('hello world!')#使用match来匹配
if match:
print "-------------"
print match.group()
#--------------------------
m = re.match(r'hello', 'hello world!')#不编译
print m.group()
#match对象的属性
m = re.match(r'(\w+) (\w+)(?P<sign>.*)', 'hello world!')
print "m.string:", m.string #匹配使用的文本
print "m.re:", m.re #匹配使用的pattern对象
print "m.pos:", m.pos#文本中正则表达式开始搜索的索引
print "m.endpos:", m.endpos#文本中正则表达式结束搜索的索引
print "m.lastindex:", m.lastindex#最后一个被捕获的分组在文本中的索引
print "m.lastgroup:", m.lastgroup#最后一个被捕获的分组的别名
#match对象的属性
print "m.group(1,2):", m.group(1, 2)#获得一个或多个分组截获的字符串;指定多个参数时将以元组形式返回
print "m.groups():", m.groups()#以元组形式返回全部分组截获的字符串。
print "m.groupdict():", m.groupdict()#返回以有别名的组的别名为键、以该组截获的子串为值的字典,没有别名的组不包含在内
print "m.start(2):", m.start(2)#返回指定的组截获的子串在string中的起始索引
print "m.end(2):", m.end(2)#返回指定的组截获的子串在string中的结束索引
print "m.span(2):", m.span(2)#返回(start(group), end(group))
print r"m.expand(r'\2 \1\3'):", m.expand(r'\2 \1\3')#将匹配到的分组代入template中然后返回
#coding:utf-8
import re
re_list = [#-----字符匹配------#
"a....c", #匹配除\n之外任意
"a.\..c", #转义
"a[b-z][^c][0-9]",#匹配字符集内字符,^取反
#-----数量词匹配------#
"abc*vb*",#匹配前一个字符0-无穷次
"abc+",#匹配前一个字符1-无穷次
"ab?c?",#匹配前一个字符0-1次
"ab{2}",#匹配前一个字符2次
"ab{2,4}",#匹配前一个字符2-4次,可以省略下限或上限
#-----边界匹配,不消耗带匹配字符串中字符------#
"^abc", #匹配开头,必须是a开头
"abc$",#匹配结尾,必须是c
"\Aabc",
"abc\Z"
"a\b!bc",
"a\Bbc",
#-----逻辑,分组------#
"abc|def",#或,左边优先,默认完整表达式可以用括号分割
"a(av){2}(12|45)5",
"(?P<id>abc){2}",#(?P<id>)分组
"(\d)abc\1",#
"(?P<id>\d)abc(?P=id)"
]
str_list=["ab%#1c","a1..c","aqw8","abccv","ab","ab","abb","abbb","cbc",
"abd","abd","11c","a!bc","def","aavav455","abcabc","8abc8","1abc1"]
for i in xrange(0 , len(re_list)):
m=re.search(re_list[i],str_list[i])
if m is not None:
print re_list[i]+"<---->"+str_list[i]+"\t"+m.group()
else:
print re_list[i]+"<---->"+str_list[i]+"\t"+"None"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.