This is a little piece of code I wrote to intercept and disregard a http or https request from urllib.urlopen if the Content-Type header on the response is not within a list of accepted content types.

I'm sure somebody might find a use for this.

This snippet creates a customer URLopener and then overrides the open_http and open_https methods, checks for MIME type and halts the request if the response is of a MIME type you do not accept.

import urllib
# check for SSL
    import ssl
    _have_ssl = False
    _have_ssl = True
class ContentTypeNotAcceptedException(Exception):
class StrictContentTypeURLopener(urllib.FancyURLopener):
    version = "Mozilla Firefox/3.6.1"
    # Set your accepted content types to this member variable.
    ok_content_types = ["text/html", "application/xhtml+xml", "application/xml"]
    def __init__(self, *args, **kwargs):
        urllib.FancyURLopener.__init__(self, *args, **kwargs)
        self.addheader("Accept", ",".join(self.ok_content_types))
    def check_content_type(self, o):
        if not self.get_content_type(o) in self.ok_content_types:
            raise ContentTypeNotAcceptedException(
                "Content-Type: %s. Allowed types: %s" % (
                    self.get_content_type(o), ", ".join(self.ok_content_types)
    def get_content_type(self, o):
        if o.__class__.__name__ == urllib.addinfourl.__name__:
                return re.sub(";.*", "","Content-Type"))
                return None
    def open_http(self, url, data=None):
        o = urllib.FancyURLopener.open_http(self, url, data)
        return o
    if _have_ssl:
        def open_https(self, url, data=None):
            o = urllib.FancyURLopener.open_https(self, url, data)
            return o
# Enable the use of our URLopener for urllib functions
urllib._urlopener = StrictContentTypeURLopener()
# Make a request, will raise ContentTypeNotAcceptedException if the request has the wrong content type.
url = ""
sock = urllib.urlopen(url)