Skip to content

Instantly share code, notes, and snippets.

@mlni
Created November 16, 2017 12:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mlni/2b0e45250a86b6b6c3abcb195b982797 to your computer and use it in GitHub Desktop.
Save mlni/2b0e45250a86b6b6c3abcb195b982797 to your computer and use it in GitHub Desktop.
Repro case for XML parsing issue with Nokogiri 1.8.1
require 'nokogiri'
def generate_xml(number_of_occurrences)
lines = []
lines << "<Root>\n"
lines << ' <ModelFile Name="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" ModellerType="aa" Unit="1" UnitFromCAD="1" VM="10" Vm="0">\n'
number_of_occurrences.times do |i|
occ = <<-XML
<ProductOccurrence Id="#{i+1}" Name="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" PersistentId="1111111111" Layer="65535" Style="65535" Behaviour="1" FilePath="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" OriginalFilePath="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" Children="1111111111111111111111111111111111111111111111111111111" ModellerType="22" ProductLoadStatus="4" ProductFlag="4" Unit="1" DensityVolumeUnit="1" DensityMassUnit="1" UnitFromCAD="1">
<Transformation RelativeTransfo="1111111111111111111111111111111" />
<A3DMiscMaterialPropertiesData m_dDensity="-1" />
</ProductOccurrence>
XML
lines << occ
end
lines << " </ModelFile>\n"
lines << "</Root>\n"
lines.join('')
end
def parse_last_occurrence_id(xml)
doc = Nokogiri::XML(xml)
model_file = doc.root.children.select {|c| c.name == "ModelFile"}.first
last_occurrence = model_file.children.select {|c| c.name == "ProductOccurrence"}.last
last_occurrence["Id"]
end
number_of_occurrences = 12000
loop do
xml = generate_xml(number_of_occurrences)
last_id = parse_last_occurrence_id(xml)
if last_id.to_i != number_of_occurrences
puts
puts "expected last ProductOccurrence @Id to be #{number_of_occurrences}, was #{last_id}\n"
last_lines = xml.split("\n").last(10).join("\n")
puts "last tags in XML: \n\n#{last_lines}"
# File.open("sample.xml", "w").write(xml)
break
end
number_of_occurrences += 50
print "#{number_of_occurrences}, " # show progress
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment