tompng/Gemfile

## Gemfile
source 'https://rubygems.org'
gem 'yarp'
gem 'parser'

## Gemfile.lock
GEM
  remote: https://rubygems.org/
  specs:
    ast (2.4.2)
    parser (3.2.2.3)
      ast (~> 2.4.1)
      racc
    racc (1.7.1)
    yarp (0.6.0)

PLATFORMS
  arm64-darwin-22

DEPENDENCIES
  parser
  yarp

BUNDLED WITH
   2.4.10

## indent_calculation.rb
class IndentBase
  def collect_open_tokens(tokens)
    open_tokens = []
    tokens.each do |token|
      if open_token? token
        open_tokens << token
      elsif close_token? token
        open_tokens.pop
      end
    end
    open_tokens
  end

  def calculate(source)
    tokens = tokenize(source)
    open_tokens = collect_open_tokens(tokens)
    calculate_from_open_tokens(open_tokens)
  end

  def calculate_from_open_tokens(open_tokens)
    indent = 0
    open_tokens.each do |token|
      if heredoc_open_token?(token)
        indent = 0
      else
        indent += 1
      end
    end
    indent
  end
end

require 'yarp'
class YarpIndent < IndentBase
  def tokenize(source)
    YARP.lex(source).value.map(&:first)
  end

  OPEN_TOKEN_TYPES = %i[
    KEYWORD_CLASS KEYWORD_MODULE KEYWORD_DEF
    LAMBDA_BEGIN KEYWORD_DO KEYWORD_CASE KEYWORD_FOR
    KEYWORD_IF KEYWORD_UNLESS KEYWORD_WHILE KEYWORD_UNTIL
    PARENTHESIS_LEFT BRACE_LEFT BRACKET_LEFT BRACKET_LEFT_ARRAY
  ]
  CLOSE_TOKEN_TYPES = %i[KEYWORD_END PARENTHESIS_RIGHT BRACE_RIGHT BRACKET_RIGHT]
  def open_token?(token) = OPEN_TOKEN_TYPES.include?(token.type) || heredoc_open_token?(token)
  def close_token?(token) = CLOSE_TOKEN_TYPES.include?(token.type) || heredoc_close_token?(token)
  def heredoc_open_token?(token) = token.type == :HEREDOC_START
  def heredoc_close_token?(token) = token.type == :HEREDOC_END
end

class YarpErrorTolerantIndent
  def calculate(source)
    indent = 0
    YARP.parse(source).errors.reverse_each do |error|
      # FIXME: depending on error message is not good
      if error.message.match?(/Expected `end`|Expected '\)'|Expected a closing bracket|Expected .+ to end with 'end'|ADD MORE PATTERNS HERE/)
        indent += 1
      elsif error.message.match?(/Expected a closing delimiter for heredoc./)
        indent = 0
      end
    end
    indent
  end
end

require 'parser/current'
class ParserIndent < IndentBase
  OPEN_TOKEN_TYPES = %i[
    kCLASS kMODULE kDEF
    kDO_LAMBDA kDO kCase kFOR
    kIF kUNLESS kWHILE kUNTIL
    tLPAREN tLPAREN2 tLCURLY tLBRACE tLBRACK tLBRACK2
  ]
  CLOSE_TOKEN_TYPES = %i[kEND tRPAREN tRCURLY tRBRACK]

  def tokenize(source)
    Parser::CurrentRuby.new.tokenize(Parser::Source::Buffer.new('', source:)).last
  end

  def open_token?(token) = OPEN_TOKEN_TYPES.include?(token[0])
  def close_token?(token) = CLOSE_TOKEN_TYPES.include?(token[0])
  # Parser does not support tokenizing unterminated heredoc
  def heredoc_open_token?(token) = false
  def heredoc_close_token?(token) = false
end

code = <<RUBY
def f
  if true
    tap do
      1
    end if true
    tap do
RUBY

puts YarpErrorTolerantIndent.new.calculate(code)
puts YarpIndent.new.calculate(code)
puts ParserIndent.new.calculate(code)

code = <<'RUBY'
if true
  if true
    <<A
#{tap do
RUBY

puts YarpErrorTolerantIndent.new.calculate(code)
puts YarpIndent.new.calculate(code)
	GEM
	remote: https://rubygems.org/
	specs:
	ast (2.4.2)
	parser (3.2.2.3)
	ast (~> 2.4.1)
	racc
	racc (1.7.1)
	yarp (0.6.0)

	PLATFORMS
	arm64-darwin-22

	DEPENDENCIES
	parser
	yarp

	BUNDLED WITH
	2.4.10
	class IndentBase
	def collect_open_tokens(tokens)
	open_tokens = []
	tokens.each do \|token\|
	if open_token? token
	open_tokens << token
	elsif close_token? token
	open_tokens.pop
	end
	end
	open_tokens
	end

	def calculate(source)
	tokens = tokenize(source)
	open_tokens = collect_open_tokens(tokens)
	calculate_from_open_tokens(open_tokens)
	end

	def calculate_from_open_tokens(open_tokens)
	indent = 0
	open_tokens.each do \|token\|
	if heredoc_open_token?(token)
	indent = 0
	else
	indent += 1
	end
	end
	indent
	end
	end

	require 'yarp'
	class YarpIndent < IndentBase
	def tokenize(source)
	YARP.lex(source).value.map(&:first)
	end

	OPEN_TOKEN_TYPES = %i[
	KEYWORD_CLASS KEYWORD_MODULE KEYWORD_DEF
	LAMBDA_BEGIN KEYWORD_DO KEYWORD_CASE KEYWORD_FOR
	KEYWORD_IF KEYWORD_UNLESS KEYWORD_WHILE KEYWORD_UNTIL
	PARENTHESIS_LEFT BRACE_LEFT BRACKET_LEFT BRACKET_LEFT_ARRAY
	]
	CLOSE_TOKEN_TYPES = %i[KEYWORD_END PARENTHESIS_RIGHT BRACE_RIGHT BRACKET_RIGHT]
	def open_token?(token) = OPEN_TOKEN_TYPES.include?(token.type) \|\| heredoc_open_token?(token)
	def close_token?(token) = CLOSE_TOKEN_TYPES.include?(token.type) \|\| heredoc_close_token?(token)
	def heredoc_open_token?(token) = token.type == :HEREDOC_START
	def heredoc_close_token?(token) = token.type == :HEREDOC_END
	end

	class YarpErrorTolerantIndent
	def calculate(source)
	indent = 0
	YARP.parse(source).errors.reverse_each do \|error\|
	# FIXME: depending on error message is not good
	if error.message.match?(/Expected `end`\|Expected '\)'\|Expected a closing bracket\|Expected .+ to end with 'end'\|ADD MORE PATTERNS HERE/)
	indent += 1
	elsif error.message.match?(/Expected a closing delimiter for heredoc./)
	indent = 0
	end
	end
	indent
	end
	end

	require 'parser/current'
	class ParserIndent < IndentBase
	OPEN_TOKEN_TYPES = %i[
	kCLASS kMODULE kDEF
	kDO_LAMBDA kDO kCase kFOR
	kIF kUNLESS kWHILE kUNTIL
	tLPAREN tLPAREN2 tLCURLY tLBRACE tLBRACK tLBRACK2
	]
	CLOSE_TOKEN_TYPES = %i[kEND tRPAREN tRCURLY tRBRACK]

	def tokenize(source)
	Parser::CurrentRuby.new.tokenize(Parser::Source::Buffer.new('', source:)).last
	end

	def open_token?(token) = OPEN_TOKEN_TYPES.include?(token[0])
	def close_token?(token) = CLOSE_TOKEN_TYPES.include?(token[0])
	# Parser does not support tokenizing unterminated heredoc
	def heredoc_open_token?(token) = false
	def heredoc_close_token?(token) = false
	end

	code = <<RUBY
	def f
	if true
	tap do
	1
	end if true
	tap do
	RUBY

	puts YarpErrorTolerantIndent.new.calculate(code)
	puts YarpIndent.new.calculate(code)
	puts ParserIndent.new.calculate(code)

	code = <<'RUBY'
	if true
	if true
	<<A
	#{tap do
	RUBY

	puts YarpErrorTolerantIndent.new.calculate(code)
	puts YarpIndent.new.calculate(code)