Skip to content

Instantly share code, notes, and snippets.

@greatghoul
Created November 6, 2014 02:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save greatghoul/c2fab58e798a91a736a4 to your computer and use it in GitHub Desktop.
Save greatghoul/c2fab58e798a91a736a4 to your computer and use it in GitHub Desktop.
from bs4 import BeautifulSoup
import re
def count(html):
soup = BeautifulSoup(html)
total = 0
for node in soup.find_all(True):
inner_html = "".join([str(x) for x in node.contents])
if node.name and re.match('^\\s*$', inner_html):
total = total + 1
return total
import unittest
import soup_counter
class SoupCounterTest(unittest.TestCase):
def test_count(self):
html = """
<dl>
<dt class="details-row-7">Overall</dt>
<dd id="c0r11" class=" alternate details-row-7">
<div class="foo"></div>
<div class="bar">
</div>
<div class="foobar"><i class="icon icon-home"> </i></div>
<div class="mobile-headings">Overall</div>
<div class="mobile-value">
<div class="ca-rating-star" data-size="1">
<i class="icon-star icon-1x" style="color: #FF9900"></i>
<i class="icon-star icon-1x" style="color: #FF9900"></i>
<i class="icon-star icon-1x" style="color: #FF9900"></i>
<i class="icon-star icon-1x" style="color: #FF9900"></i>
<i class="icon-star-empty icon-1x" style="color: #FF9900"></i>
</div>
</div>
</dd>
</dl>
"""
count = soup_counter.count(html)
self.assertEquals(count, 8)
if __name__ == '__main__':
unittest.main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment