Skip to content

Instantly share code, notes, and snippets.

@JakubOrsula
Last active April 1, 2021 10:42
Show Gist options
  • Save JakubOrsula/bd4f6370ca4efa61dee30dcd94566883 to your computer and use it in GitHub Desktop.
Save JakubOrsula/bd4f6370ca4efa61dee30dcd94566883 to your computer and use it in GitHub Desktop.
import os
import re
from bs4 import BeautifulSoup, NavigableString
GIT_DIR = '/home/jakub/github/pb138/'
def validate(url):
page = open(url)
soup = BeautifulSoup(page.read(), 'html.parser')
forbidden_tags = soup.find_all(['table', 'td', 'tr', 'strong', 'small', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7', 'svg', 'br', 'style'])
if len(forbidden_tags) > 0:
print('[ FAIL ] submission contains forbidden tags', forbidden_tags)
if len(soup.find_all('body')) != 1:
print('[ FAIL] I expected to find exactly one body tag')
return
forbidden_attrs = ['style', 'width', 'height']
for tag in soup.body.children:
if isinstance(tag, NavigableString):
continue
for attr in forbidden_attrs:
if tag.has_attr(attr):
print(f'[ FAIL ] {attr} attrs are forbidden: {tag}')
for img in soup.find_all('img'):
if not img.has_attr('alt'):
print('[ FAIL ]Imgs must have alt text', img)
elif len(img['alt']) <= 3:
print('[ WARN ] ALt tag should be something meaningful', img)
for row in (soup.find_all('row') + soup.find_all('div', {'class', re.compile(r'.*=row( )?.*')})):
if row is not None and len(row.find_all('col')) + len(row.find_all('div', {'class', re.compile(r'.*=col( )?.*')})) < 1:
print('[ WARN ]There are rows without cols!', row)
for inp in soup.find_all('input'):
if len(inp.find_parents('form')) != 1:
print('Every input should be wrapped in one form', inp)
for repo in os.listdir(GIT_DIR):
flag = False
for root, dirs, files in os.walk(os.path.join(GIT_DIR, repo)):
for file in files:
if file.endswith('.html') and flag:
print('Duplicate html file', os.path.join(root, file))
if file.endswith('.html') and not flag:
flag = True
print('~~~~~~')
print(repo)
print('~~~~~~')
validate(os.path.join(root, file))
if not flag:
print('~~~~~\nHtml file for ' + repo + ' not found!\n~~~~~~')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment