JokerQyou/pattern_split.py

## pattern_split.py
# coding: utf-8
from __future__ import print_function

import re

pattern = u''.join([
    u'[',
    u'\u0020-\u002f',  # < General Latin characters, exclude @, letters and numbers
    u'\u003A-\u003f',
    u'\u005b-\u0060',
    u'\u007b-\u007f',  # > See http://jrgraphix.net/r/Unicode/0020-007F for details
    u'\u2000-\u206f',  # General Punctuation
    u'\u2e00-\u2e7f',  # Supplemental Punctuation
    u'\u3000-\u303f',  # CJK Symbols and Punctuation
    u'\uff00-\uffef',  # Halfwidth and Fullwidth Forms
    u'\ufff0-\uffff',  # Specials
    u']+',
])

# The above form expalins how the pattern works, but you can use this equivalent too
# u'[ -/:-?[-`{-\x7f\u2000-\u206f\u2e00-\u2e7f\u3000-\u303f\uff00-\uffef\ufff0-\uffff]+'

a = u'''@测试员1，关注一下这个。@测试员2？@测试员3!@测试员4,@tester5:你好 @其他人
 @更多人，换行带了空格
@还有谁？换行不带空格'''

[print(i) for i in re.split(pattern, a)]
	# coding: utf-8
	from __future__ import print_function

	import re

	pattern = u''.join([
	u'[',
	u'\u0020-\u002f', # < General Latin characters, exclude @, letters and numbers
	u'\u003A-\u003f',
	u'\u005b-\u0060',
	u'\u007b-\u007f', # > See http://jrgraphix.net/r/Unicode/0020-007F for details
	u'\u2000-\u206f', # General Punctuation
	u'\u2e00-\u2e7f', # Supplemental Punctuation
	u'\u3000-\u303f', # CJK Symbols and Punctuation
	u'\uff00-\uffef', # Halfwidth and Fullwidth Forms
	u'\ufff0-\uffff', # Specials
	u']+',
	])

	# The above form expalins how the pattern works, but you can use this equivalent too
	# u'[ -/:-?[-`{-\x7f\u2000-\u206f\u2e00-\u2e7f\u3000-\u303f\uff00-\uffef\ufff0-\uffff]+'

	a = u'''@测试员1，关注一下这个。@测试员2？@测试员3!@测试员4,@tester5:你好 @其他人
	@更多人，换行带了空格
	@还有谁？换行不带空格'''

	[print(i) for i in re.split(pattern, a)]