Chuck Bearden said:
My impression is that if I want to make several requests over a single,
persistent HTTP 1.1 connection, I must use httplib rather than urllib2
or ClientCookies. Is that correct?
Yes.
To put it another way, can I have cookie support over persistent
connections without having to code the cookie support myself?
Yes. You can use httplib direct, and use a couple of little functions
to link it up to ClientCookie. The request and response interfaces
required are well-documented. Here is a clumsy cobbled-together hack,
for HTTP only, which may even work ;-) No automatic redirection
handling, proxy support, etc. The cookies bit is simple (thanks to
ClientCookie
, it's the rest that's messy.
IIUC, the only problem is that urllib.addbase expects a readline
method (as do some users of urllib2, no doubt), which
httplib.HTTPResponse doesn't provide. So,
urllib2.AbstractHTTPHandler.do_open() passes urllib.addinfourl the
underlying socket object instead (the fp attribute of HTTPResponse),
which does have a readline() method. In turn, that means that
HTTPResponse's knowledge of the HTTP protocol is lost, and you can't
do more than one request in a single connection. So, the code below
passes the HTTPResponse itself to addinfourl, rather than just its fp
attribute. I suppose the solution is to add a readline method to
HTTPResponse (and whichever other methods HTTPResponse.fp has and
addbase passes through).
import httplib
import ClientCookie
class addbase:
"""Base class for addinfo and addclosehook."""
def __init__(self, fp):
self.fp = fp
self.read = self.fp.read
if hasattr(self.fp, "readlines"): self.readline = self.fp.readline
if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
if hasattr(self.fp, "fileno"): self.fileno = self.fp.fileno
if hasattr(self.fp, "__iter__"):
self.__iter__ = self.fp.__iter__
if hasattr(self.fp, "next"):
self.next = self.fp.next
def __repr__(self):
return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
id(self), self.fp)
def close(self):
self.read = None
self.readline = None
self.readlines = None
self.fileno = None
if self.fp: self.fp.close()
self.fp = None
class addclosehook(addbase):
"""Class to add a close hook to an open file."""
def __init__(self, fp, closehook, *hookargs):
addbase.__init__(self, fp)
self.closehook = closehook
self.hookargs = hookargs
def close(self):
addbase.close(self)
if self.closehook:
self.closehook(*self.hookargs)
self.closehook = None
self.hookargs = None
class addinfo(addbase):
"""class to add an info() method to an open file."""
def __init__(self, fp, headers):
addbase.__init__(self, fp)
self.headers = headers
def info(self):
return self.headers
class addinfourl(addbase):
"""class to add info() and geturl() methods to an open file."""
def __init__(self, fp, headers, url):
addbase.__init__(self, fp)
self.headers = headers
self.url = url
def info(self):
return self.headers
def geturl(self):
return self.url
class SimpleURLOpener:
def __init__(self, cookiejar, debuglevel=0):
self.cj = cookiejar
self._debuglevel = debuglevel
self.conn = None
self.host = None
def open_connection(self, host):
assert self.conn is self.host is None
self.host = host
self.conn = httplib.HTTPConnection(host)
def close_connection(self):
self.conn.close()
self.conn = self.host = None
def set_http_debuglevel(self, level):
self._debuglevel = level
def add_cookie_header(self, request):
self.cj.add_cookie_header(request)
def extract_cookies(self, response, request):
self.cj.extract_cookies(response, request)
def open(self, url_or_request):
if isinstance(url_or_request, ClientCookie.Request):
req = url_or_request
else:
req = ClientCookie.Request(url_or_request)
self.add_cookie_header(req)
self._do_request(req)
r = self._do_open(req)
self.extract_cookies(r, req)
return r
def _do_request(self, request):
def _do_open(self, req):
from urllib import splittype, splithost
# bleah
host = req.get_host()
if not host:
raise URLError('no host given')
scheme, sel = splittype(req.get_selector())
sel_host, sel_path = splithost(sel)
assert self.host == (sel_host or host)
if request.has_data(): # POST
data = request.get_data()
if not request.has_header('Content-type'):
request.add_unredirected_header(
'Content-type',
'application/x-www-form-urlencoded')
if not request.has_header('Content-length'):
request.add_unredirected_header(
'Content-length', '%d' % len(data))
if not request.has_header('Host'):
request.add_unredirected_header('Host', self.host)
self.conn.set_debuglevel(self._debuglevel)
headers = dict(req.headers)
headers.update(req.unredirected_hdrs)
try:
self.conn.request(req.get_method(), req.get_selector(), req.data, headers)
r = self.conn.getresponse()
except socket.error, err: # XXX what error?
raise URLError(err)
# Pick apart the HTTPResponse object to get the addinfourl
# object initialized properly
resp = addinfourl(r, r.msg, req.get_full_url())
resp.code = r.status
resp.msg = r.reason
return resp
opener = SimpleURLOpener(ClientCookie.CookieJar())
opener.open_connection("python.org")
r1 = opener.open("
http://python.org/index.html")
print "***********************************************************"
print r1.read()
req = ClientCookie.Request("
http://python.org/download",
headers={"Foo-Bar": "baz"})
r2 = opener.open(req)
print "***********************************************************"
print r2.read()
opener.close_connection()
John