Skip to content

Instantly share code, notes, and snippets.

@tkf
Created March 10, 2010 08:07
Show Gist options
  • Save tkf/327665 to your computer and use it in GitHub Desktop.
Save tkf/327665 to your computer and use it in GitHub Desktop.
import re
FORMAT_PATTERN_GROUPED = re.compile("{([^}]+):([^}]+)}")
FORMAT_PATTERN = re.compile("{[^}]+:[^}]+}")
def find_variables(formatstr, strlist):
"""
Extract values from fixed formatted string.
(e.g., '1' and '1000.0' from '001_1E3.txt')
Usage:
>>> result = find_variables( '{x:int}_{y:float}.txt',
... '001_1E3.txt' )
>>> result['x']
1
>>> result['y']
1000.0
>>> format = 'aaa_{i:int}_bbb_{x:float}_{s:str}'
>>> result = find_variables(format, 'aaa_0_bbb_1.0_spam')
>>> (result['i'], result['x'], result['s'])
(0, 1.0, 'spam')
>>> sl = ['aaa_00_bbb_1_foo', 'aaa_01_bbb_1E2_bar']
>>> result = find_variables(format, sl)
>>> (result[sl[0]]['i'], result[sl[0]]['x'], result[sl[0]]['s'])
(0, 1.0, 'foo')
>>> (result[sl[1]]['i'], result[sl[1]]['x'], result[sl[1]]['s'])
(1, 100.0, 'bar')
>>> format = '{i:int}bbb{x:float}_{s:str}'
>>> result = find_variables(format, '0bbb1.0_spam')
>>> (result['i'], result['x'], result['s'])
(0, 1.0, 'spam')
>>> format = '{i:int}bbb{x:float}_{s:str}.txt'
>>> result = find_variables(format, '0bbb1.0_spam.txt')
>>> (result['i'], result['x'], result['s'])
(0, 1.0, 'spam')
"""
vfff = get_vfff(formatstr)
if isinstance(strlist, str):
return vfff.extract(strlist)
else:
return vfff.extract_from_list(strlist)
def get_vfff(formatstr):
"""
Get VariablesFromFixedFormat from string format such as
'{x:int}_{y:float}.txt'.
"""
return VariablesFromFixedFormat(vfff_format(formatstr))
def vfff_format(formatstr):
"""
Translate string format such as '{x:int}_{y:float}.txt' for
VariablesFromFixedFormat.
Usage:
>>> vfff_format('A{x:int}_B{y:float}.txt')
['A', ('x', <type 'int'>), '_B', ('y', <type 'float'>), '.txt']
"""
list_key_type = FORMAT_PATTERN_GROUPED.findall(formatstr)
list_fixedstr = FORMAT_PATTERN.split(formatstr)
formatlist = []
for (fixedstr, key_type) in zip(list_fixedstr, list_key_type):
if 'int' == key_type[1]:
func = int
elif 'float' == key_type[1]:
func = float
elif 'str' == key_type[1]:
func = str
else:
raise ValueError ("type '%s' is not supported" % key_type[1])
if fixedstr != '':
formatlist.append(fixedstr)
formatlist.append((key_type[0], func))
if len(list_fixedstr) == len(list_key_type):
pass
elif len(list_fixedstr) == len(list_key_type) + 1:
fixedstr = list_fixedstr[-1]
if fixedstr != '':
formatlist.append(fixedstr)
else:
raise ValueError ("Unexpected: "
"len(list_fixedstr) = %d, "
"len(list_key_type) = %d" %
(len(list_fixedstr), len(list_key_type)) )
return formatlist
class VariablesFromFixedFormat(object):
"""
Extract variable elements from fixed format strings.
(e.g., 0, 1, 2 ... from 'fixed_0_str', 'fixed_1_str', 'fixed_2_str' ...)
"""
def __init__(self, format, original='original'):
"""
- `format`: ['str1', ('key1', int),
'str2', ('key2', float),
'str3', ('key3', str), ]
list of string or key-callable pair tuple.
"""
self.__original = original
self.__fixedstrs = []
self.__keypos = {}
self.__convfunc = {}
pos = 0
for (i, elem) in enumerate(format):
if isinstance(elem, str):
self.__fixedstrs.append(elem)
pos += 1
else:
(key, func) = elem
self.__keypos[key] = pos
self.__convfunc[key] = func
if original in self.__keypos.keys():
raise ValueError ("key '%s' for original string is already "
"used in the format keys" % original)
def extract(self, s):
"""
Usage:
>>> vfff = VariablesFromFixedFormat(['a', ('x',int), 'b', ('y',int)])
>>> valdict = vfff.extract('a1b2')
>>> valdict['x']
1
>>> valdict['y']
2
>>> valdict['original']
'a1b2'
>>> valdict = vfff.extract('a0001b0002')
>>> valdict['x']
1
>>> valdict['y']
2
>>> valdict['original']
'a0001b0002'
"""
values = []
leaving = s
for fs in self.__fixedstrs:
(val, leaving) = leaving.split(fs)
values.append(val)
values.append(leaving)
valdict = { self.__original:s }
for (key, pos) in self.__keypos.iteritems():
func = self.__convfunc[key]
valdict[key] = func(values[pos])
return valdict
def extract_from_list(self, strlist):
"""
Usage:
>>> vfff = VariablesFromFixedFormat(['a', ('x',int), 'b', ('y',int)])
>>> result = vfff.extract_from_list(['a1b2', 'a3b4'])
>>> result['a1b2']['x']
1
>>> result['a3b4']['y']
4
"""
dict2tuple = lambda x: (x[self.__original], x)
return dict([dict2tuple(self.extract(s)) for s in strlist])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment