This is a little piece of code I wrote to intercept and disregard a http or https request from urllib.urlopen if the Content-Type header on the response is not within a list of accepted content types.
I'm sure somebody might find a use for this.
This snippet creates a customer URLopener and then overrides the open_http and open_https methods, checks for MIME type and halts the request if the response is of a MIME type you do not accept.
import urllib # check for SSL try: import ssl except: _have_ssl = False else: _have_ssl = True class ContentTypeNotAcceptedException(Exception): pass class StrictContentTypeURLopener(urllib.FancyURLopener): version = "Mozilla Firefox/3.6.1" # Set your accepted content types to this member variable. ok_content_types = ["text/html", "application/xhtml+xml", "application/xml"] def __init__(self, *args, **kwargs): urllib.FancyURLopener.__init__(self, *args, **kwargs) self.addheader("Accept", ",".join(self.ok_content_types)) def check_content_type(self, o): if not self.get_content_type(o) in self.ok_content_types: raise ContentTypeNotAcceptedException( "Content-Type: %s. Allowed types: %s" % ( self.get_content_type(o), ", ".join(self.ok_content_types) ) ) def get_content_type(self, o): if o.__class__.__name__ == urllib.addinfourl.__name__: try: return re.sub(";.*", "", o.info().getheader("Content-Type")) except: return None def open_http(self, url, data=None): o = urllib.FancyURLopener.open_http(self, url, data) self.check_content_type(o) return o if _have_ssl: def open_https(self, url, data=None): o = urllib.FancyURLopener.open_https(self, url, data) self.check_contenttype(o) return o # Enable the use of our URLopener for urllib functions urllib._urlopener = StrictContentTypeURLopener() # Make a request, will raise ContentTypeNotAcceptedException if the request has the wrong content type. url = "http://neverfear.org" sock = urllib.urlopen(url)