Skip to content

Instantly share code, notes, and snippets.

@phaibin
Created March 2, 2013 00:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save phaibin/5069006 to your computer and use it in GitHub Desktop.
Save phaibin/5069006 to your computer and use it in GitHub Desktop.
check xml tag error
# encoding: utf-8
def find_tag(str)
if str =~ /<\/(.*?)>/mu
return {is_begin: false, tag: $~.captures.first}
end
if str =~ /<(.*?)( |>)/mu
return {is_begin: true, tag: $~.captures.first}
end
end
str = File.read(File.expand_path('~/Google Drive/temp//LibraryMessages_en.xlf.xml'), :encoding => 'utf-8')
pattern = /(<.*?>)/mu
matches = str.to_enum(:scan, pattern).map { Regexp.last_match }
begin_tags = []
matches.each do |match|
if match.to_s =~ /<(\?|!).*?>/mu or match.to_s =~ /<.*?\/>/mu
next
end
tag = find_tag(match.to_s)
if tag
# puts match.to_s
# puts "tag is " + tag[1]
if tag[:is_begin] # begin tag
begin_tags << {match: match, tag: tag[:tag]}
# puts 'push begin tag: ' + tag[:tag]
else # end tag
begin_tag = begin_tags.pop
if begin_tag[:tag] == tag[:tag]
# puts ' pop begin tag: ' + tag[:tag]
next
else
puts '============================================'
puts 'error find at line: #' + (str[0..begin_tag[:match].begin(0)].count("\n")+1).to_s
puts 'begin tag is: ' + begin_tag[:tag]
puts 'begin match is: ' + str[begin_tag[:match].begin(0), 100]
puts 'current tag is: ' + tag[:tag]
puts 'current match is: ' + str[match.begin(0), 100]
puts '============================================'
break;
end
end
end
end
puts 'success'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment