Skip to content

Instantly share code, notes, and snippets.

@valq7711
Created June 16, 2016 20:38
Show Gist options
  • Save valq7711/fe0ecefe7f6eb60fc737a47cd03c6e05 to your computer and use it in GitHub Desktop.
Save valq7711/fe0ecefe7f6eb60fc737a47cd03c6e05 to your computer and use it in GitHub Desktop.
def txt_split(s):
import re
indx_srch = '<[a-z0-9 ]+>'
txt_frags = re.split(indx_srch, s)
indx_lst = re.findall(indx_srch,s)
ret=[]
ret.extend(re.split(' +', txt_frags[0]))
for i,indx in enumerate( indx_lst):
if ret[-1]=='':
ret[-1]=[indx]
else:
if not isinstance( ret[-1], list):
ret[-1]=[ret[-1]]
ret[-1].append( indx)
cur_lst = re.split(' +', txt_frags[i+1])
if cur_lst[0]!='':
ret[-1].append(cur_lst.pop(0))
else:
cur_lst.pop(0)
ret.extend(cur_lst)
return ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment