Created
October 5, 2010 20:49
-
-
Save aarongough/612311 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def extract_string_literals( string ) | |
string_literal_pattern = /"([^"\\]|\\.)*"/ | |
string_replacement_token = "___+++STRING_LITERAL+++___" | |
# Find and extract all the string literals | |
string_literals = [] | |
string.gsub(string_literal_pattern) {|x| string_literals << x} | |
# Replace all the string literals with our special placeholder token | |
string = string.gsub(string_literal_pattern, string_replacement_token) | |
# Return the modified string and the array of string literals | |
return [string, string_literals] | |
end | |
def tokenize_string( string ) | |
string = string.gsub("(", " ( ") | |
string = string.gsub(")", " ) ") | |
token_array = string.split(" ") | |
return token_array | |
end | |
def restore_string_literals( token_array, string_literals ) | |
return token_array.map do |x| | |
if(x == '___+++STRING_LITERAL+++___') | |
# Since we've detected that a string literal needs to be replaced we | |
# will grab the first available string from the string_literals array | |
string_literals.shift | |
else | |
# This is not a string literal so we need to just return the token as it is | |
x | |
end | |
end | |
end | |
# A helper method to take care of the repetitive stuff for us | |
def is_match?( string, pattern) | |
match = string.match(pattern) | |
return false unless match | |
# Make sure that the matched pattern consumes the entire token | |
match[0].length == string.length | |
end | |
# Detect a symbol | |
def is_symbol?( string ) | |
# Anything other than parentheses, single or double quote and commas | |
return is_match?( string, /[^\"\'\,\(\)]+/ ) | |
end | |
# Detect an integer literal | |
def is_integer_literal?( string ) | |
# Any number of numerals optionally preceded by a plus or minus sign | |
return is_match?( string, /[\-\+]?[0-9]+/ ) | |
end | |
# Detect a string literal | |
def is_string_literal?( string ) | |
# Any characters except double quotes | |
# (except if preceded by a backslash), surrounded by quotes | |
return is_match?( string, /"([^"\\]|\\.)*"/) | |
end | |
def convert_tokens( token_array ) | |
converted_tokens = [] | |
token_array.each do |t| | |
converted_tokens << "(" and next if( t == "(" ) | |
converted_tokens << ")" and next if( t == ")" ) | |
converted_tokens << t.to_i and next if( is_integer_literal?(t) ) | |
converted_tokens << t.to_sym and next if( is_symbol?(t) ) | |
converted_tokens << eval(t) and next if( is_string_literal?(t) ) | |
# If we haven't recognized the token by now we need to raise | |
# an exception as there are no more rules left to check against! | |
raise Exception, "Unrecognized token: #{t}" | |
end | |
return converted_tokens | |
end | |
def re_structure( token_array, offset = 0 ) | |
struct = [] | |
while( offset < token_array.length ) | |
if(token_array[offset] == "(") | |
# Multiple assignment from the array that re_structure() returns | |
offset, tmp_array = re_structure(token_array, offset + 1) | |
struct << tmp_array | |
elsif(token_array[offset] == ")") | |
break | |
else | |
struct << token_array[offset] | |
end | |
offset += 1 | |
end | |
return [offset, struct] | |
end | |
def parse( string ) | |
string, string_literals = extract_string_literals( string ) | |
token_array = tokenize_string( string ) | |
token_array = restore_string_literals( token_array, string_literals ) | |
token_array = convert_tokens( token_array ) | |
s_expression = re_structure( token_array )[1] | |
return s_expression | |
end | |
puts parse('(this (is a number 1( example "s-expression")))').inspect |
That actually sounds interesting! I'll have to have a look next week!
I can't promise anything though as I am totally swamped at the moment. If you have a look at Sexpistol and wish to contribute a patch to make it compatible with sexp_path I would be happy to accept it!
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Just a thought, if you take a look at the sexp_processor gem and the sexp_path, you can output Sexp class instances, and then do all kinds of pattern matching / transformations on the Sexp instances.
Basically I'm just looking for some reason someone would ever use sexp_path ;)