Created
July 5, 2022 05:30
-
-
Save armahillo/98177827f2fe09daeb34e79938e2e57e to your computer and use it in GitHub Desktop.
Rails task that identifies text that should be extracted to localization files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# I wrote this up one evening while auditing an app for localization. It can almost certainly be optimized, but it works! | |
namespace :i18n do | |
# This method is doing most of the heavy lifting. It accepts a full filename with path and returns | |
# a hash of arrays, where each key is one of the different groupings | |
def scan_file_for_literals file_with_path | |
file = File.read(file_with_path) | |
# First off, omit any straight ruby code that isn't emitted, since it won't need to be localized | |
file.gsub! /^<%[^=].*%>/m, '' | |
# With the remaining code, find all HTML and just pull the contents. This will also get ERB tags. | |
results = file.scan(/<[^>]+>(.*)<\/.*>/).flatten | |
# Buuuuut.... omit any ERB tags that contain translations already, since there's no work to do here! | |
results.reject! { |f| | |
f.match?(/<%=[^>]+t/) | |
} | |
# Bail if there's nothing left to process on this file | |
return if results.empty? || results == [''] | |
# These are the four groupings that I found useful. You can add additional grouping keys here and below | |
grouping = { :erb => [], :snakecase => [], :html => [], :plain => [] } | |
# You can remove any of these cases if they aren't useful to you. Just update the if/elsifs | |
results.each do |result| | |
# This just looks for snakecase strings that are _only_ snakecase. This came up for me when I had | |
# certain font icon bits, that looked like: <i>some_icon</i> | |
if result.match? /^[a-z][0-9a-z_]+$/ | |
grouping[:snakecase] << result | |
# We've already omitted ERB tags that have localizations, so any remaining ERB can be pulled | |
elsif result.match? /<%=/ | |
grouping[:erb] << result | |
# Snag any remaining HTML-looking business and separate it out | |
elsif result.match?(/<\w+/) || result.match?(/\&[a-z]+;/) | |
grouping[:html] << result | |
# Even though it's in the throwaway box, this is almost certainly just the stuff we want to localize | |
else | |
grouping[:plain] << result | |
end | |
end | |
# We only want groupings that were found in this file | |
grouping.delete_if { |k| grouping[k].blank? } | |
end | |
# Convenience method that converts: | |
# /home/username/application_name/app/views/foos/show.html.erb => app/views/foos/show.html.erb | |
def abbreviate_filename file_with_path | |
(file_with_path.gsub Rails.root.to_s, '')[1..] | |
end | |
desc "Find all views that have HTML tags with text nodes" | |
task :find_literals do |t, args| | |
# You can override this with some rake args if you want | |
include_variants = [:plain] | |
# Limit the scanned files to just those in the views. | |
VIEWS_DIR = Dir.glob(Rails.root.join('app', 'views', '**', '*.erb')) | |
VIEWS_DIR.map do |filename| | |
# I have a few kitchen sink files for the styleguide and whatnot. They end with "_test.html" | |
next if File.basename(filename).match?("test.html") | |
# See above. This pulls a hash that has the different snippets grouped by type | |
f = scan_file_for_literals(filename) | |
# This limits the final output to just the key groups that are specified above | |
f&.slice!(*include_variants) | |
# Don't output if we have no results within the desired group key | |
next unless f.present? | |
# Feel free to change this to whatever. I like to see the filename followed by | |
# the text bits that need to be moved to i18n YML files. `ap` is from the `awesome_print` gem | |
puts abbreviate_filename(filename) | |
ap f | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment