Skip to content

Instantly share code, notes, and snippets.

@stewartpark
Created June 20, 2018 19:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save stewartpark/4d16d082983960e1dcc13d5d24a6cca4 to your computer and use it in GitHub Desktop.
Save stewartpark/4d16d082983960e1dcc13d5d24a6cca4 to your computer and use it in GitHub Desktop.
Quick and dirty Ruby lexer/parser/rewriter to make the rails 5 migration easier
#!/usr/bin/env ruby
##
# Quick and dirty ruby lexer
class RubyLexer
TOKENS = {
identifier: /\A[@a-zA-Z_][a-zA-Z_0-9]*/m,
symbol: /\A:[a-zA-Z0-9_]+/m,
string: /\A"([^"]|\\.)*"|\A'([^']|\\.)*'/,
shellout_literal: /\A`([^`\\]|\\.)*`/m,
ruby_literal: /\A\%[a-zA-Z]?(\(\{[\s\S]*?\}\)|\[[\s\S]*?\]|\{[\s\S]*?\}|\([\s\S]*?\))/m,
number: /\A[+\-]?[0-9]+(\.[0-9]+)?/m,
kv_sep_old: /\A=>/m,
ops: /\A[+\-\/*.=!~<>?&|][|&<>=.]?/m,
assign_ops: /\A[+\-\/*]?[=]/m,
namespace_sep: /\A::/m,
lambda_args: /\A\|.*?\|/m,
comma: /\A,/m,
kv_sep: /\A:/m,
lparen: /\A\(/m,
rparen: /\A\)/m,
lcurl: /\A\{/m,
rcurl: /\A\}/m,
lsquare: /\A\[/m,
rsquare: /\A\]/m,
concat: /\A\\/m,
ignore: /\A[ \n\t\r\n;]+|\A#(.+?)[\n]/m,
}.freeze
def initialize(source)
@source = source
end
def lex
tokens = []
cur = @source
until cur.empty?
TOKENS.each do |k, v|
t = v.match(cur).to_a
next if t.empty?
cur = cur[t.first.size..-1]
tokens << [k, t.first]
break
end
end
tokens
rescue Interrupt
p tokens
exit 1
end
def self.to_source(tokens)
tokens.map do |v|
v[1]
end.join('')
end
end
##
# RubyInterestParser only parses interest points and rewrites.
# i.e. things to fix
class RubyInterestParser
INTEREST_RULES = {
rule_rspec: [
[:identifier, /get|post|put|delete/],
[/symbol|string/, //],
[:comma, //]
],
rule_rspec_2: [
[:identifier, /get|post|put|delete/],
[:lparen, //],
[/symbol|string/, //],
[:comma, //]
],
rule_wrong_select_usage: [
[:identifier, 'select'],
[:lcurl, //],
[:number, '1'],
[:rcurl, //]
],
rule_wrong_kv_usage: [
[:string, //],
[:kv_sep, //]
],
# Disabled
#rule_old_kv_usage_1: [
# [:symbol, //],
# [:kv_sep_old, //]
#],
#rule_old_kv_usage_2: [
# [:string, //],
# [:kv_sep_old, //]
#]
}.freeze
def initialize(tokens, rewriters)
@tokens = tokens
@rewriters = rewriters
end
def match_part?(term_part, token_part)
if term_part.is_a?(Regexp) && term_part =~ token_part.to_s
true
else
term_part == token_part
end
end
def match?(rule, tokens)
tokens = tokens.reject { |x| x.first == :ignore }
rule.each_with_index do |r, i|
t = tokens[i]
return false unless match_part?(r[0], t[0]) && match_part?(r[1], t[1])
end
true
end
def parse_and_rewrite
@rewriters.each do |rw|
c = 0
cur = @tokens
printf ""
until cur.empty?
printf "\r#{rw.class.name}: #{(100 - (cur.size.to_f / @tokens.size * 100)).to_i}%" if cur.size % 1000 == 0
unless cur.first.first == :ignore
INTEREST_RULES.each do |k, v|
next unless match?(v, cur)
c += 1 if rw.rewrite!(k, cur.reject { |x| x.first == :ignore })
end
end
cur = cur[1..-1]
end
puts "\r#{rw.class.name}: #{c} rewritten."
end
RubyLexer.to_source(@tokens)
end
end
##
# RubyFixer rewrites some syntax
class RubyFixer
def rewrite!(name, tokens)
case name
when :rule_wrong_select_usage
##
# Fix #1
# We convert ranges with `select{1}` sometimes, which can just be `to_a`.
tokens[0][1] = 'to_a'
tokens[1][0] = :ignore
tokens[1][1] = ''
tokens[2][0] = :ignore
tokens[2][1] = ''
tokens[3][0] = :ignore
tokens[3][1] = ''
true
when :rule_wrong_kv_usage
##
# Fix #2
# Just assume this is an identifier
tokens[0][0] = :identifier
true
when :rule_old_kv_usage_1
##
# Fix #3
# Change old style hash to the new one
tokens[0][0] = :identifier
tokens[0][1] = tokens[0][1][1..-1]
tokens[1][0] = :kv_sep
tokens[1][1] = ':'
true
when :rule_old_kv_usage_2
##
# Fix #4
# Change old style hash to the new one
unless ['-', '+', '*', '/', '.'].any?{|x|tokens[0][1].include?(x)}
tokens[0][0] = :identifier
tokens[0][1] = tokens[0][1][1..-2]
end
tokens[1][0] = :kv_sep
tokens[1][1] = ':'
true
else
false
end
end
end
##
# RspecFixer rewrites the syntax difference between rails 4 and rails 5 in rspec
class RspecFixer
def skip_value(i, tokens)
case tokens[i]&.first
when :lparen
i += 1
i = skip_value(i, tokens)
if tokens[i]&.first == :rparen
i += 1
else
fail "rparen expected"
end
when :lsquare
i += 1
until tokens[i]&.first == :rsquare
i = skip_value(i, tokens)
if tokens[i]&.first == :comma
i += 1
end
end
i += 1
when :lcurl
i += 1
until tokens[i]&.first == :rcurl
i = skip_value(i, tokens)
if [:kv_sep, :kv_sep_old].include?(tokens[i]&.first)
i += 1
else
fail "kv_sep expected"
end
i = skip_value(i, tokens)
if tokens[i]&.first == :comma
i += 1
end
end
i += 1
else
i += 1
end
# Function call
if tokens[i]&.first == :lparen
i += 1
until tokens[i]&.first == :rparen
i = skip_value(i, tokens)
if tokens[i]&.first == :comma
i += 1
end
end
i += 1
end
# Index
if tokens[i]&.first == :lsquare
i += 1
i = skip_value(i, tokens)
if tokens[i]&.first == :rsquare
i += 1
else
fail "rsquare expected"
end
end
# Binary operators
if tokens[i]&.first == :ops || tokens[i]&.first == :namespace_sep
i += 1
i = skip_value(i, tokens)
end
i
end
def rewrite!(name, tokens)
if name == :rule_rspec_2
# Process the data as if there is no lparen.
tokens.delete_at(1)
name = :rule_rspec
end
return false unless name == :rule_rspec
has_fixed = false
##
# Fix #1
# If the endpoint is written in string, let's use symbol.
# e.g. (old) get "update", something
# (new) get :update, something
if tokens[1].first == :string
has_fixed = true
tokens[1][0] = :symbol
tokens[1][1] = ":#{tokens[1][1][1..-2]}"
end
##
# Fix #2
# If the request does not have `params:`, add it.
# This is not easy with sed becasue we need to understand where the line
# ends.
# e.g. (old) get :update, something
# (new) get :update, params: something
# (old) get :update, a: 'test',
# c: { a: 1 }
# (new) get :update, params: { a: 'test',
# c: { a: 1 } }
unless tokens[2].first == :comma &&
tokens[3] == [:identifier, 'params'] &&
tokens[4].first == :kv_sep
has_fixed = true
tokens[2][1] += ' params:'
# Diffiult key argument case
# If it is already in a hash form, we wouldn't have a problem.
# But there are many instances where it's a key argument, which is actually just a hash at the end.
if (tokens[3]&.first == :symbol && tokens[4]&.first == :kv_sep_old) ||
(tokens[3]&.first == :identifier && tokens[4]&.first == :kv_sep)
tokens[2][1] += ' {'
i = 3
loop do
i = skip_value(i, tokens)
if [:kv_sep, :kv_sep_old].include?(tokens[i]&.first)
i += 1
else
fail "kv_sep expected"
end
i = skip_value(i, tokens)
if tokens[i]&.first == :comma
i += 1
else
break
end
end
tokens[i - 1][1] += ' }'
end
end
has_fixed
end
end
ARGV.each do |f|
begin
next unless f.end_with?('.rb')
puts "Fixing #{f}..."
# The order of rewriters will be honored.
rewriters = [
RubyFixer.new,
RspecFixer.new
]
tokens = RubyLexer.new(File.read(f)).lex
new_source = RubyInterestParser.new(tokens, rewriters).parse_and_rewrite
File.write(f, new_source)
rescue Errno::ENOENT => e
puts "!!! Cannot open file: #{f}"
end
end
puts "Done! #{ARGV.length} file(s) fixed"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment