-
-
Save freshtonic/6974873 to your computer and use it in GitHub Desktop.
require 'spec_helper' | |
def serialize(thing, format=:as_html) | |
case thing | |
when Nokogiri::HTML::Document | |
document.serialize(:encoding => 'UTF-8') do |config| | |
config.format.send format | |
end | |
when Nokogiri::HTML::DocumentFragment | |
thing.serialize | |
end | |
end | |
describe "Nokogiri significant whitespace preservation in <pre> and <code> blocks" do | |
let(:html) do | |
" | |
<div> | |
<pre> | |
<code> | |
<img src='about:blank'> | |
<img src='about:blank'> | |
</code> | |
</pre> | |
</div> | |
" | |
end | |
let(:document) { Nokogiri::HTML html } | |
let(:fragment) { Nokogiri::HTML::fragment html } | |
context 'use a fragment (serialize with default args)' do | |
it "should output the image tags on the same line" do | |
serialize(fragment).lines.count{|l| l =~ /img/}.should == 2 | |
end | |
end | |
context 'use a document (serialize as XML)' do | |
it "should output the image tags on the same line" do | |
serialize(document, :as_xml).lines.count{|l| l =~ /img/}.should == 2 | |
end | |
end | |
context 'use a document (serialize as HTML)' do | |
it "should output the image tags on the same line" do | |
serialize(document, :as_html).lines.count{|l| l =~ /img/}.should == 2 | |
end | |
end | |
end |
FFF | |
Failures: | |
1) Nokogiri significant whitespace preservation in <pre> and <code> blocks use a document (serialize as XML) should output the image tags on the same line | |
Failure/Error: serialize(fragment, :as_xml).lines.count{|l| l =~ /img/}.should == 2 | |
expected: 2 | |
got: 1 (using ==) | |
# ./spec/ce/whitespace_spec.rb:41:in `block (3 levels) in <top (required)>' | |
2) Nokogiri significant whitespace preservation in <pre> and <code> blocks use a document (serialize as HTML) should output the image tags on the same line | |
Failure/Error: serialize(fragment, :as_html).lines.count{|l| l =~ /img/}.should == 2 | |
expected: 2 | |
got: 1 (using ==) | |
# ./spec/ce/whitespace_spec.rb:47:in `block (3 levels) in <top (required)>' | |
3) Nokogiri significant whitespace preservation in <pre> and <code> blocks use a fragment (serialize with default args) should output the image tags on the same line | |
Failure/Error: serialize(fragment).lines.count{|l| l =~ /img/}.should == 2 | |
expected: 2 | |
got: 1 (using ==) | |
# ./spec/ce/whitespace_spec.rb:35:in `block (3 levels) in <top (required)>' | |
Finished in 0.0541 seconds | |
3 examples, 3 failures | |
Failed examples: | |
rspec ./spec/ce/whitespace_spec.rb:40 # Nokogiri significant whitespace preservation in <pre> and <code> blocks use a document (serialize as XML) should output the image tags on the same line | |
rspec ./spec/ce/whitespace_spec.rb:46 # Nokogiri significant whitespace preservation in <pre> and <code> blocks use a document (serialize as HTML) should output the image tags on the same line | |
rspec ./spec/ce/whitespace_spec.rb:34 # Nokogiri significant whitespace preservation in <pre> and <code> blocks use a fragment (serialize with default args) should output the image tags on the same line |
Maybe change the require
d libs so it says:
require 'rspec'
require 'nokogiri'
Needing Rails for this sucks :) I'll see about other people taking a look at this gist.
Let me see if I can organise my thoughts here.
The immediate problem we're having is Nokogiri is adding newlines to any tags, namely <span>
, when outputting HTML. Which fucks with <pre>
tags. Solving that, we'll be good, and I believe everything else will be ok too since I distinctly recall the content extractor doing a good job bookmarking code from sites like css-tricks.com.
So am I correct in assuming that finding how not to pretty-print when outputting stuff with Nokogiri would solve this?
No. The problem is tags with nothing but whitespace between them get put on one line.
Well, maybe it's a mixture of pretty-printing and stripping new lines. Not sure. I will continue on this tonight.
It's weird how #serialize take args and a block versus no args depending on whether we have a document or a fragment. Stinky API.