-
-
Save teward/23ee127167851185f189cd22391da702 to your computer and use it in GitHub Desktop.
SmokeDetector Testing for zero-length or whitespace-only links.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import unittest | |
import bs4 | |
# This is the function we're testing | |
def zerolength_link_test(post_contents: str) -> bool: | |
# Returns 'True' if there is a zero length link in here. | |
bs = bs4.BeautifulSoup(post_contents, 'html.parser') | |
for link in bs.find_all('a'): | |
if '<img ' in str(link): | |
# Image embeds in links are not zero-length for this case. | |
continue | |
if len(link.text) == 0: | |
return True | |
if link.text.isspace() or not link.text.isprintable(): | |
return True | |
return False | |
class ZeroLengthLinkTests(unittest.TestCase): | |
def test_whitespace_only_link(self): | |
test = 'This is a test at <a href="https://google.com"> </a> whitespace-only ' \ | |
'links which are effectively zero-length.' | |
self.assertTrue(zerolength_link_test(test)) | |
def test_zero_length_link_nonobfuscated(self): | |
test = "This is a test of <a href='google.com'></a> actual zero-length link text." | |
self.assertTrue(zerolength_link_test(test)) | |
def test_zero_length_link_tag_obfuscation(self): | |
test = "This is a test of <a href='google.com'><em></em></a> z" \ | |
"ero length links obfuscated by tags." | |
self.assertTrue(zerolength_link_test(test)) | |
def test_unprintable_only_link(self): | |
test = "This one has unprintable characters <a href='google.com'>\t\f\r\n</a> in the link." | |
self.assertTrue(zerolength_link_test(test)) | |
def test_not_zero_length_link(self): | |
test = "This is a test of <a href='https://google.com'>an actual link to " \ | |
"Google</a> that is not Zero Length." | |
self.assertFalse(zerolength_link_test(test)) | |
def test_whitespace_only_link_tag_obfuscation(self): | |
test = "This is a test of a whitespace only link <a href='google.com'><span> </span></a>" \ | |
" obfuscated with span tags." | |
self.assertTrue(zerolength_link_test(test)) | |
if __name__ == "__main__": | |
unittest.main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment