Created
March 10, 2010 08:07
-
-
Save tkf/327665 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
FORMAT_PATTERN_GROUPED = re.compile("{([^}]+):([^}]+)}") | |
FORMAT_PATTERN = re.compile("{[^}]+:[^}]+}") | |
def find_variables(formatstr, strlist): | |
""" | |
Extract values from fixed formatted string. | |
(e.g., '1' and '1000.0' from '001_1E3.txt') | |
Usage: | |
>>> result = find_variables( '{x:int}_{y:float}.txt', | |
... '001_1E3.txt' ) | |
>>> result['x'] | |
1 | |
>>> result['y'] | |
1000.0 | |
>>> format = 'aaa_{i:int}_bbb_{x:float}_{s:str}' | |
>>> result = find_variables(format, 'aaa_0_bbb_1.0_spam') | |
>>> (result['i'], result['x'], result['s']) | |
(0, 1.0, 'spam') | |
>>> sl = ['aaa_00_bbb_1_foo', 'aaa_01_bbb_1E2_bar'] | |
>>> result = find_variables(format, sl) | |
>>> (result[sl[0]]['i'], result[sl[0]]['x'], result[sl[0]]['s']) | |
(0, 1.0, 'foo') | |
>>> (result[sl[1]]['i'], result[sl[1]]['x'], result[sl[1]]['s']) | |
(1, 100.0, 'bar') | |
>>> format = '{i:int}bbb{x:float}_{s:str}' | |
>>> result = find_variables(format, '0bbb1.0_spam') | |
>>> (result['i'], result['x'], result['s']) | |
(0, 1.0, 'spam') | |
>>> format = '{i:int}bbb{x:float}_{s:str}.txt' | |
>>> result = find_variables(format, '0bbb1.0_spam.txt') | |
>>> (result['i'], result['x'], result['s']) | |
(0, 1.0, 'spam') | |
""" | |
vfff = get_vfff(formatstr) | |
if isinstance(strlist, str): | |
return vfff.extract(strlist) | |
else: | |
return vfff.extract_from_list(strlist) | |
def get_vfff(formatstr): | |
""" | |
Get VariablesFromFixedFormat from string format such as | |
'{x:int}_{y:float}.txt'. | |
""" | |
return VariablesFromFixedFormat(vfff_format(formatstr)) | |
def vfff_format(formatstr): | |
""" | |
Translate string format such as '{x:int}_{y:float}.txt' for | |
VariablesFromFixedFormat. | |
Usage: | |
>>> vfff_format('A{x:int}_B{y:float}.txt') | |
['A', ('x', <type 'int'>), '_B', ('y', <type 'float'>), '.txt'] | |
""" | |
list_key_type = FORMAT_PATTERN_GROUPED.findall(formatstr) | |
list_fixedstr = FORMAT_PATTERN.split(formatstr) | |
formatlist = [] | |
for (fixedstr, key_type) in zip(list_fixedstr, list_key_type): | |
if 'int' == key_type[1]: | |
func = int | |
elif 'float' == key_type[1]: | |
func = float | |
elif 'str' == key_type[1]: | |
func = str | |
else: | |
raise ValueError ("type '%s' is not supported" % key_type[1]) | |
if fixedstr != '': | |
formatlist.append(fixedstr) | |
formatlist.append((key_type[0], func)) | |
if len(list_fixedstr) == len(list_key_type): | |
pass | |
elif len(list_fixedstr) == len(list_key_type) + 1: | |
fixedstr = list_fixedstr[-1] | |
if fixedstr != '': | |
formatlist.append(fixedstr) | |
else: | |
raise ValueError ("Unexpected: " | |
"len(list_fixedstr) = %d, " | |
"len(list_key_type) = %d" % | |
(len(list_fixedstr), len(list_key_type)) ) | |
return formatlist | |
class VariablesFromFixedFormat(object): | |
""" | |
Extract variable elements from fixed format strings. | |
(e.g., 0, 1, 2 ... from 'fixed_0_str', 'fixed_1_str', 'fixed_2_str' ...) | |
""" | |
def __init__(self, format, original='original'): | |
""" | |
- `format`: ['str1', ('key1', int), | |
'str2', ('key2', float), | |
'str3', ('key3', str), ] | |
list of string or key-callable pair tuple. | |
""" | |
self.__original = original | |
self.__fixedstrs = [] | |
self.__keypos = {} | |
self.__convfunc = {} | |
pos = 0 | |
for (i, elem) in enumerate(format): | |
if isinstance(elem, str): | |
self.__fixedstrs.append(elem) | |
pos += 1 | |
else: | |
(key, func) = elem | |
self.__keypos[key] = pos | |
self.__convfunc[key] = func | |
if original in self.__keypos.keys(): | |
raise ValueError ("key '%s' for original string is already " | |
"used in the format keys" % original) | |
def extract(self, s): | |
""" | |
Usage: | |
>>> vfff = VariablesFromFixedFormat(['a', ('x',int), 'b', ('y',int)]) | |
>>> valdict = vfff.extract('a1b2') | |
>>> valdict['x'] | |
1 | |
>>> valdict['y'] | |
2 | |
>>> valdict['original'] | |
'a1b2' | |
>>> valdict = vfff.extract('a0001b0002') | |
>>> valdict['x'] | |
1 | |
>>> valdict['y'] | |
2 | |
>>> valdict['original'] | |
'a0001b0002' | |
""" | |
values = [] | |
leaving = s | |
for fs in self.__fixedstrs: | |
(val, leaving) = leaving.split(fs) | |
values.append(val) | |
values.append(leaving) | |
valdict = { self.__original:s } | |
for (key, pos) in self.__keypos.iteritems(): | |
func = self.__convfunc[key] | |
valdict[key] = func(values[pos]) | |
return valdict | |
def extract_from_list(self, strlist): | |
""" | |
Usage: | |
>>> vfff = VariablesFromFixedFormat(['a', ('x',int), 'b', ('y',int)]) | |
>>> result = vfff.extract_from_list(['a1b2', 'a3b4']) | |
>>> result['a1b2']['x'] | |
1 | |
>>> result['a3b4']['y'] | |
4 | |
""" | |
dict2tuple = lambda x: (x[self.__original], x) | |
return dict([dict2tuple(self.extract(s)) for s in strlist]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment