This is a little piece of code I wrote to intercept and disregard a http or https request from urllib.urlopen if the Content-Type header on the response is not within a list of accepted content types.

I'm sure somebody might find a use for this.

This snippet creates a customer URLopener and then overrides the open_http and open_https methods, checks for MIME type and halts the request if the response is of a MIME type you do not accept.

import urllib
 
# check for SSL
try:
    import ssl
except:
    _have_ssl = False
else:
    _have_ssl = True
 
class ContentTypeNotAcceptedException(Exception):
    pass
 
class StrictContentTypeURLopener(urllib.FancyURLopener):
    version = "Mozilla Firefox/3.6.1"
 
    # Set your accepted content types to this member variable.
    ok_content_types = ["text/html", "application/xhtml+xml", "application/xml"]
 
    def __init__(self, *args, **kwargs):
        urllib.FancyURLopener.__init__(self, *args, **kwargs)
        self.addheader("Accept", ",".join(self.ok_content_types))
 
    def check_content_type(self, o):
        if not self.get_content_type(o) in self.ok_content_types:
            raise ContentTypeNotAcceptedException(
                "Content-Type: %s. Allowed types: %s" % (
                    self.get_content_type(o), ", ".join(self.ok_content_types)
                )
            )
 
    def get_content_type(self, o):
        if o.__class__.__name__ == urllib.addinfourl.__name__:
            try:
                return re.sub(";.*", "", o.info().getheader("Content-Type"))
            except:
                return None
 
    def open_http(self, url, data=None):
        o = urllib.FancyURLopener.open_http(self, url, data)
        self.check_content_type(o)
        return o
 
    if _have_ssl:
        def open_https(self, url, data=None):
            o = urllib.FancyURLopener.open_https(self, url, data)
            self.check_contenttype(o)
            return o
 
 
# Enable the use of our URLopener for urllib functions
urllib._urlopener = StrictContentTypeURLopener()
 
# Make a request, will raise ContentTypeNotAcceptedException if the request has the wrong content type.
url = "http://neverfear.org"
sock = urllib.urlopen(url)