Skip to content

Instantly share code, notes, and snippets.

@KhodeN
Created May 23, 2018 09:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save KhodeN/9dd0b52732f36d096f2710705b637401 to your computer and use it in GitHub Desktop.
Save KhodeN/9dd0b52732f36d096f2710705b637401 to your computer and use it in GitHub Desktop.
import re
import unittest
from unittest import TestCase
def striptags(html, *tags):
result = html
for tag in tags:
result = re.sub(r'<{tag}>.+?</{tag}>'.format(tag=tag), r'', result)
return result
def strip_headers(html):
return striptags(html, 'h1', 'h2', 'h3', 'h4', 'h5', 'h6')
class TempTestCase(TestCase):
def test_striptags(self):
self.assertEqual('a', striptags('<x>b</x>a', 'x'))
def test_strip_headers(self):
self.assertEqual('\nxxx', strip_headers('<h1>header</h1>\nxxx'))
self.assertEqual('aaa\nxxx', strip_headers('aaa<h2>header</h2>\nxxx'))
self.assertEqual('aaa\nxxx', strip_headers('<h1>hea1</h1>aaa<h2>header</h2>\nxxx'))
if __name__ == '__main__':
unittest.main()
@KhodeN
Copy link
Author

KhodeN commented May 23, 2018

Не работает со сложными заголовками

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment