Skip to content

Instantly share code, notes, and snippets.

@akamch
Created December 4, 2011 13:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save akamch/1430166 to your computer and use it in GitHub Desktop.
Save akamch/1430166 to your computer and use it in GitHub Desktop.
HTML5 command-line validation script (http://about.validator.nu/html5check.py), Python 3 version
#!/usr/bin/python3
# Copyright (c) 2007-2008 Mozilla Foundation
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
import http.client
import os
import sys
import re
import urllib.parse
import string
import gzip
import io
extPat = re.compile(r'^.*\.([A-Za-z]+)$')
extDict = {
"html" : "text/html",
"htm" : "text/html",
"xhtml" : "application/xhtml+xml",
"xht" : "application/xhtml+xml",
"xml" : "application/xml",
}
argv = sys.argv[1:]
forceXml = 0
forceHtml = 0
gnu = 0
errorsOnly = 0
encoding = None
fileName = None
contentType = None
inputHandle = None
service = 'http://html5.validator.nu/'
for arg in argv:
if '--help' == arg:
print('-h : force text/html')
print('-x : force application/xhtml+xml')
print('-g : GNU output')
print('-e : errors only (no info or warnings)')
print('--encoding=foo : declare encoding foo')
print('--service=url : the address of the HTML5 validator')
print('One file argument allowed. Leave out to read from stdin.')
sys.exit(0)
elif arg.startswith("--encoding="):
encoding = arg[11:]
elif arg.startswith("--service="):
service = arg[10:]
elif arg.startswith("--"):
sys.stderr.write('Unknown argument %s.\n' % arg)
sys.exit(2)
elif arg.startswith("-"):
for c in arg[1:]:
if 'x' == c:
forceXml = 1
elif 'h' == c:
forceHtml = 1
elif 'g' == c:
gnu = 1
elif 'e' == c:
errorsOnly = 1
else:
sys.stderr.write('Unknown argument %s.\n' % arg)
sys.exit(3)
else:
if fileName:
sys.stderr.write('Cannot have more than one input file.\n')
sys.exit(1)
fileName = arg
if forceXml and forceHtml:
sys.stderr.write('Cannot force HTML and XHTML at the same time.\n')
sys.exit(2)
if forceXml:
contentType = 'application/xhtml+xml'
elif forceHtml:
contentType = 'text/html'
elif fileName:
m = extPat.match(fileName)
if m:
ext = m.group(1)
ext = ext.translate(str.maketrans(string.ascii_uppercase, string.ascii_lowercase))
if ext in extDict:
contentType = extDict[ext]
else:
sys.stderr.write('Unable to guess Content-Type from file name. Please force the type.\n')
sys.exit(3)
else:
sys.stderr.write('Could not extract a filename extension. Please force the type.\n')
sys.exit(6)
else:
sys.stderr.write('Need to force HTML or XHTML when reading from stdin.\n')
sys.exit(4)
if encoding:
contentType = '%s; charset=%s' % (contentType, encoding)
if fileName:
inputHandle = open(fileName, "rb")
else:
inputHandle = sys.stdin
data = inputHandle.read()
buf = io.BytesIO()
gzipper = gzip.GzipFile(fileobj=buf, mode='wb')
gzipper.write(data)
gzipper.close()
gzippeddata = buf.getvalue()
buf.close()
connection = None
response = None
status = 302
redirectCount = 0
url = service
if gnu:
url = url + '?out=gnu'
else:
url = url + '?out=text'
if errorsOnly:
url = url + '&level=error'
while (status == 302 or status == 301 or status == 307) and redirectCount < 10:
if redirectCount > 0:
url = response.getheader('Location')
parsed = urllib.parse.urlsplit(url)
if parsed[0] != 'http':
sys.stderr.write('URI scheme %s not supported.\n' % parsed[0])
sys.exit(7)
if redirectCount > 0:
connection.close() # previous connection
print('Redirecting to %s' % url)
print('Please press enter to continue or type "stop" followed by enter to stop.')
if input() != "":
sys.exit(0)
connection = http.client.HTTPConnection(parsed[1])
connection.connect()
connection.putrequest("POST", "%s?%s" % (parsed[2], parsed[3]), skip_accept_encoding=1)
connection.putheader("Accept-Encoding", 'gzip')
connection.putheader("Content-Type", contentType)
connection.putheader("Content-Encoding", 'gzip')
connection.putheader("Content-Length", len(gzippeddata))
connection.endheaders()
connection.send(gzippeddata)
response = connection.getresponse()
status = response.status
redirectCount += 1
if status != 200:
sys.stderr.write('%s %s\n' % (status, response.reason))
sys.exit(5)
if response.getheader('Content-Encoding', 'identity').lower() == 'gzip':
response = gzip.GzipFile(fileobj=io.BytesIO(response.read()))
if fileName and gnu:
quotedName = '"%s"' % fileName.replace('"', '\\042')
for line in response:
sys.stdout.write(quotedName)
sys.stdout.write(line)
else:
sys.stdout.write(response.read().decode('utf-8'))
connection.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment