armahillo/i18n.rake

## i18n.rake
# I wrote this up one evening while auditing an app for localization. It can almost certainly be optimized, but it works!

namespace :i18n do
  # This method is doing most of the heavy lifting. It accepts a full filename with path and returns
  # a hash of arrays, where each key is one of the different groupings
  def scan_file_for_literals file_with_path
    file = File.read(file_with_path)

    # First off, omit any straight ruby code that isn't emitted, since it won't need to be localized
    file.gsub! /^<%[^=].*%>/m, ''

    # With the remaining code, find all HTML and just pull the contents. This will also get ERB tags.
    results = file.scan(/<[^>]+>(.*)<\/.*>/).flatten

    # Buuuuut.... omit any ERB tags that contain translations already, since there's no work to do here!
    results.reject! { |f|
      f.match?(/<%=[^>]+t/)
    }

    # Bail if there's nothing left to process on this file
    return if results.empty? || results == ['']

    # These are the four groupings that I found useful. You can add additional grouping keys here and below
    grouping = { :erb => [], :snakecase => [], :html => [], :plain => [] }

    # You can remove any of these cases if they aren't useful to you. Just update the if/elsifs
    results.each do |result|
      # This just looks for snakecase strings that are _only_ snakecase. This came up for me when I had
      # certain font icon bits, that looked like: <i>some_icon</i>
      if result.match? /^[a-z][0-9a-z_]+$/
        grouping[:snakecase] << result
      # We've already omitted ERB tags that have localizations, so any remaining ERB can be pulled
      elsif result.match? /<%=/
        grouping[:erb] << result
      # Snag any remaining HTML-looking business and separate it out
      elsif result.match?(/<\w+/) || result.match?(/\&[a-z]+;/)
        grouping[:html] << result
      # Even though it's in the throwaway box, this is almost certainly just the stuff we want to localize
      else
        grouping[:plain] << result
      end
    end

    # We only want groupings that were found in this file
    grouping.delete_if { |k| grouping[k].blank? }
  end

  # Convenience method that converts:
  # /home/username/application_name/app/views/foos/show.html.erb => app/views/foos/show.html.erb
  def abbreviate_filename file_with_path
    (file_with_path.gsub Rails.root.to_s, '')[1..]
  end

  desc "Find all views that have HTML tags with text nodes"
  task :find_literals do |t, args|
    # You can override this with some rake args if you want
    include_variants = [:plain]

    # Limit the scanned files to just those in the views.
    VIEWS_DIR = Dir.glob(Rails.root.join('app', 'views', '**', '*.erb'))
    VIEWS_DIR.map do |filename|
      # I have a few kitchen sink files for the styleguide and whatnot. They end with "_test.html"
      next if File.basename(filename).match?("test.html")

      # See above. This pulls a hash that has the different snippets grouped by type
      f = scan_file_for_literals(filename)

      # This limits the final output to just the key groups that are specified above
      f&.slice!(*include_variants)

      # Don't output if we have no results within the desired group key
      next unless f.present?

      # Feel free to change this to whatever. I like to see the filename followed by
      # the text bits that need to be moved to i18n YML files. `ap` is from the `awesome_print` gem
      puts abbreviate_filename(filename)
      ap f
    end
  end

end
	# I wrote this up one evening while auditing an app for localization. It can almost certainly be optimized, but it works!

	namespace :i18n do
	# This method is doing most of the heavy lifting. It accepts a full filename with path and returns
	# a hash of arrays, where each key is one of the different groupings
	def scan_file_for_literals file_with_path
	file = File.read(file_with_path)

	# First off, omit any straight ruby code that isn't emitted, since it won't need to be localized
	file.gsub! /^<%[^=].*%>/m, ''

	# With the remaining code, find all HTML and just pull the contents. This will also get ERB tags.
	results = file.scan(/<[^>]+>(.)<\/.>/).flatten

	# Buuuuut.... omit any ERB tags that contain translations already, since there's no work to do here!
	results.reject! { \|f\|
	f.match?(/<%=[^>]+t/)
	}

	# Bail if there's nothing left to process on this file
	return if results.empty? \|\| results == ['']

	# These are the four groupings that I found useful. You can add additional grouping keys here and below
	grouping = { :erb => [], :snakecase => [], :html => [], :plain => [] }

	# You can remove any of these cases if they aren't useful to you. Just update the if/elsifs
	results.each do \|result\|
	# This just looks for snakecase strings that are _only_ snakecase. This came up for me when I had
	# certain font icon bits, that looked like: <i>some_icon</i>
	if result.match? /^[a-z][0-9a-z_]+$/
	grouping[:snakecase] << result
	# We've already omitted ERB tags that have localizations, so any remaining ERB can be pulled
	elsif result.match? /<%=/
	grouping[:erb] << result
	# Snag any remaining HTML-looking business and separate it out
	elsif result.match?(/<\w+/) \|\| result.match?(/\&[a-z]+;/)
	grouping[:html] << result
	# Even though it's in the throwaway box, this is almost certainly just the stuff we want to localize
	else
	grouping[:plain] << result
	end
	end

	# We only want groupings that were found in this file
	grouping.delete_if { \|k\| grouping[k].blank? }
	end

	# Convenience method that converts:
	# /home/username/application_name/app/views/foos/show.html.erb => app/views/foos/show.html.erb
	def abbreviate_filename file_with_path
	(file_with_path.gsub Rails.root.to_s, '')[1..]
	end

	desc "Find all views that have HTML tags with text nodes"
	task :find_literals do \|t, args\|
	# You can override this with some rake args if you want
	include_variants = [:plain]

	# Limit the scanned files to just those in the views.
	VIEWS_DIR = Dir.glob(Rails.root.join('app', 'views', '*', '.erb'))
	VIEWS_DIR.map do \|filename\|
	# I have a few kitchen sink files for the styleguide and whatnot. They end with "_test.html"
	next if File.basename(filename).match?("test.html")

	# See above. This pulls a hash that has the different snippets grouped by type
	f = scan_file_for_literals(filename)

	# This limits the final output to just the key groups that are specified above
	f&.slice!(*include_variants)

	# Don't output if we have no results within the desired group key
	next unless f.present?

	# Feel free to change this to whatever. I like to see the filename followed by
	# the text bits that need to be moved to i18n YML files. `ap` is from the `awesome_print` gem
	puts abbreviate_filename(filename)
	ap f
	end
	end

	end