Skip to content

Instantly share code, notes, and snippets.

@simonmichael
Created November 24, 2009 23:10
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save simonmichael/242321 to your computer and use it in GitHub Desktop.
Save simonmichael/242321 to your computer and use it in GitHub Desktop.
##############################################################################
#
# Copyright (c) 2003 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
"""A utility module for content-type handling.
$Id$
"""
import string
import re
import os.path
import mimetypes
find_binary = re.compile('[\0-\7]').search
def text_type(s):
"""Given an unnamed piece of text, try to guess its content type.
Detects HTML, XML, and plain text. Returns a MIME type string
such as 'text/html'.
"""
# at least the maximum length of any tags we look for
iMAXLEN=14
if len(s) < iMAXLEN: return 'text/plain'
i = 0
while s[i] in string.whitespace:
i += 1
s2 = s[i : i+iMAXLEN]
s = s2.lower()
if s.startswith('<html>'):
return 'text/html'
if s.startswith('<!doctype html'):
return 'text/html'
# what about encodings??
if s2.startswith('<?xml'):
return 'text/xml'
return 'text/plain'
def guess_content_type(name='', body='', default=None):
"""Given a named piece of content, try to guess its content type.
The implementation relies on the 'mimetypes' standard Python module,
the 'text_type' function also defined in this module, and a simple
heuristic for detecting binary data.
Returns a MIME type string such as "text/html".
"""
# Attempt to determine the content type (and possibly
# content-encoding) based on an an object's name and
# entity body.
type, enc = mimetypes.guess_type(name)
if type is None:
if body:
if find_binary(body) is not None:
type = default or 'application/octet-stream'
else:
type = (default or text_type(body)
or 'text/x-unknown-content-type')
else:
type = default or 'text/x-unknown-content-type'
return type.lower(), enc and enc.lower() or None
def add_files(filenames):
"""Add the names of MIME type map files to the standard 'mimetypes' module.
MIME type map files are used for detecting the MIME type of some content
based on the content's filename extension.
The files should be formatted similarly to the 'mime.types' file
included in this package. Each line specifies a MIME type and the
file extensions that imply that MIME type. Here are some sample lines::
text/css css
text/plain bat c h pl ksh
text/x-vcard vcf
"""
# Make sure the additional files are either loaded or scheduled to
# be loaded:
if mimetypes.inited:
# Re-initialize the mimetypes module, loading additional files
mimetypes.init(filenames)
else:
# Tell the mimetypes module about the additional files so
# when it is initialized, it will pick up all of them, in
# the right order.
mimetypes.knownfiles.extend(filenames)
# Provide definitions shipped as part of Zope:
here = os.path.dirname(os.path.abspath(__file__))
add_files([os.path.join(here, "mime.types")])
if __name__ == '__main__':
items = mimetypes.types_map.items()
items.sort()
for item in items: print "%s:\t%s" % item
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment