Skip to content

Instantly share code, notes, and snippets.

@timothyclemans
Created November 21, 2010 23:38
Show Gist options
  • Save timothyclemans/709291 to your computer and use it in GitHub Desktop.
Save timothyclemans/709291 to your computer and use it in GitHub Desktop.
code for computational search
query = 'ALS agencies in King County'
def a_vs_b(a, b):
return
def agencies_in_location(type, location):
return
query_formats_to_functions = {}
non_values_to_query_formats = {} # given tuple of the non values return query format
def extract_non_values_from_format(format):
# find indexes of {% and %} and store as pairs
start = 0
start_end_indexes_for_variable_names = []
for i in range(format.count('{%')):
start_end_indexes_for_variable_names.append((format.find('{%', start), format.find('%}', start)))
start = start_end_indexes_for_variable_names[-1][1]
# find starting and ending indexes for the non variable elements
start_end_indexes_for_non_variable_elements = []
for i in range(len(start_end_indexes_for_variable_names) - 1):
start = start_end_indexes_for_variable_names[i][1] + 3
end = start_end_indexes_for_variable_names[i + 1][0] - 1
start_end_indexes_for_non_variable_elements.append((start, end))
non_variable_elements = [format[start:end] for start, end in start_end_indexes_for_non_variable_elements]
#return start_end_indexes_for_variable_names
#print non_variable_elements
return non_variable_elements
def add_format(format, function):
query_formats_to_functions.update({format: function})
non_values_to_query_formats.update({tuple(extract_non_values_from_format(format)): format})
add_format('{% a %} vs. {% b %}', a_vs_b)
add_format('{% agency_type %} agencies in {% location %}', agencies_in_location)
def find_matching_format(query):
for non_values_tuple in list(non_values_to_query_formats):
if not False in [non_value in query for non_value in non_values_tuple]:
return non_values_to_query_formats[non_values_tuple]
raise LookupError, 'No matching format found'
def extract_values_from_query(format, query):
non_values = extract_non_values_from_format(format)
# find start and end indexes of non values
start_end_indexes_of_non_values = []
for i, non_value in enumerate(non_values):
start = query.find(non_value)
end = start + len(non_value)
start_end_indexes_of_non_values.append((start, end))
# find start and end indexes of values
start_end_indexes_of_values = []
if start_end_indexes_of_non_values[0][0] > 0 and start_end_indexes_of_non_values[-1][1] < len(query) - 1:
if len(non_values) == 1:
start_end_indexes_of_values = [(0, start_end_indexes_of_non_values[0][0] - 1), (start_end_indexes_of_non_values[0][1] + 1, None)]
return [query[start:end] for start, end in start_end_indexes_of_values]
print find_matching_format('Boston vs. Seattle')
print find_matching_format('BLS agencies in New York')
print find_matching_format('should raise an error')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment