Skip to content

Instantly share code, notes, and snippets.

@martinthenext
Last active April 28, 2016 15:01
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save martinthenext/fc989ffa6ec84ee09962 to your computer and use it in GitHub Desktop.
Save martinthenext/fc989ffa6ec84ee09962 to your computer and use it in GitHub Desktop.
String transformation with one example
"""
This is a simple example of string transformer that you can teach by one example.
>>> transformer = StringTransformer()
>>> transformer.train_by_example('Joel, 1949, piano', '{name: \'Joel\', instrument: \'piano\'}')
>>> print transformer.transform('Lennon, 1940, guitar')
{name: 'Lennon', instrument: 'guitar'}
"""
import re
from itertools import izip
# We disassemble strings into 'groups' of letters/numbers
def get_groups(string):
return [x for x, y, z in re.findall(r'((\d+)|([a-zA-Z]+))', string)]
# Assume the groups of the output examples are 'transformations' of input groups
# Some transformations
def transform_equality(before):
return before
def transform_upper_case(before):
return before.upper()
def transform_lower_case(before):
return before.lower()
def transform_reverse_string(before):
return before[::-1]
# Rank transformations by what we can be more sure in
transformations = [
transform_reverse_string,
transform_equality,
transform_upper_case,
transform_lower_case,
]
class StringTransformer(object):
"""
>>> transformer = StringTransformer()
>>> transformer.train_by_example('2015-12-07T09:15:17-05:00AA', '07/12/2015 at 09h15aa')
>>> transformer.transform('1998-11-05T08:15:21-05:00BB')
'05/11/1998 at 08h15bb'
"""
def __init__(self):
# We store the transformation method we learned
self.transform_from_pairs = None
self.template_after = None
def train_by_example(self, before, after):
# Let us identify groups of numeric or alphabetic characters
groups_before = get_groups(before)
groups_after = get_groups(after)
# Save this example for simplicity
self.groups_before = groups_before
self.groups_after = groups_after
# We need to be able to assemble an instance of 'after' from this list
# Substitute groups with placeholders
if '%s' in after:
raise ValueError('We cannot work with strings that contain \'%s\'')
template_after = after
for group in groups_after:
template_after = template_after.replace(group, "%s", 1)
self.template_after = template_after
# For every after group we find from what transformation and index it comes from
transform_from_pairs = [ None ] * len(groups_after)
for t in transformations:
for after_group_idx, after_group in enumerate(groups_after):
# Do we already have a source for this group?
if transform_from_pairs[after_group_idx] is not None:
continue
groups_before_transformed = [t(g) for g in groups_before]
if after_group in groups_before_transformed:
# In how many places the after_group could be found?
indexes = [i for i, g in enumerate(groups_before_transformed)
if g == after_group ]
if len(indexes) > 1:
# Can't figure out where exactly the group comes from
pass
else:
index_before, = indexes
transform_from_pairs[after_group_idx] = (t, index_before)
self.transform_from_pairs = transform_from_pairs
def transform(self, before_new):
groups_before_new = get_groups(before_new)
if len(groups_before_new) != len(self.groups_before):
raise ValueError('New string differs from an example in pattern')
# By default we take the known example as a transformation
groups_after_new = self.groups_after
# Then we use the known transformations
for idx, t_from_idx in enumerate(self.transform_from_pairs):
if t_from_idx is not None:
t, from_idx = t_from_idx
groups_after_new[idx] = t(groups_before_new[from_idx])
return self.template_after % tuple(groups_after_new)
print 'Welcome to string transformer. Please provide a tranformation example'
before = raw_input('Before: ')
after = raw_input('After: ')
transformer = StringTransformer()
transformer.train_by_example(before, after)
print 'Thank you. Strings are now transformed automatically:'
print 'Press Enter to quit'
while(True):
to_transform = raw_input('-> ')
if to_transform:
print transformer.transform(to_transform)
else:
quit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment