C
Carl Waldbieser
**** Post for FREE via your newsreader at post.usenet.com ****
I have written a simple web proxy using the Python standard library
BaseHTTPRequestHandler. Right now, all it does is log the web traffic
passing through it to the console. I have been testing it by setting my
browser's proxy setting to localhost:8077 and browsing to various web
sites. Some web sites work fine (e.g. www.python.org). However, some web
sites simply seem to stall indefinitely (e.g. www.google.com). If I set
the same browser to connect directly to the Internet, the site comes up
close to immediately.
If anybody has any ideas about why this happens, or any coding mistakes I
may have made, I would appreciate the feedback.
Thanks,
Carl Waldbieser
--------- Python code below ------------------------------------
#############################################################
# Simple proxy
#############################################################
from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
from urlparse import urlparse
import urllib2
import re
def multiple_replace(adict, text):
"""Replace multiple patterns in a single pass."""
regex = re.compile("|".join(map(re.escape, adict.keys())))
return regex.sub(lambda match: adict[match.group(0)], text)
def htmlencode(s):
"""Replace HTML special characters (&,<,>,',") with entities."""
entities = {"&":"&", "<":"<", ">":">", "'":"'",
'"':"""}
return multiple_replace(entities, s)
class WebProxyRequestHandler(BaseHTTPRequestHandler):
"""A subclass of BaseHTTPRequestHandler that acts as a web proxy
server and can be chained with other web proxies.
"""
def do_GET(self):
"""Handles an HTTP GET.
"""
print "do_GET()"
self.get_post_impl()
def do_POST(self):
print "do_POST()"
length = self.headers["Content-Length"]
d = self.rfile.read(int(length))
self.get_post_impl(d)
def get_post_impl(self, data=None):
print "client host: %s\nclient port %d" % self.client_address
print "command: %s" % self.command
print "path: %s" % self.path
print "request_version: %s" % self.request_version
print "\n-- headers --"
for header in self.headers.keys():
print "%s: %s" % (header, self.headers.getheaders(header))
print "-- end headers --\n"
#Forward the request.
server = self.server
if server.proxy_addr:
(scheme, netloc, path, parameters, query, fragment) =
urlparse(self.path)
print "scheme: %s\nnetloc: %s\npath: %s\nparameters: %s\nquery:
%s\nfragment: %s" % (scheme, netloc, path, parameters, query, fragment)
url = "%s:%d" % server.proxy_addr
print "Proxy URL: %s" % url
self.retrieve_request(data, {'http':url})
else:
self.retrieve_request(data)
def retrieve_request(self, data, proxies={}):
request = urllib2.Request(self.path)
for header in self.headers.keys():
if header.lower() != "host" and header.lower() != "user-agent":
values = self.headers.getheaders(header)
value_string = " ".join(values)
request.add_header(header, value_string)
for proxy_type in proxies:
print "setting proxy: (%s, %s)" % (proxies[proxy_type],
proxy_type)
request.set_proxy(proxies[proxy_type], proxy_type)
if data != None:
request.add_data(data)
print "Attempting to open %s ..." % self.path
try:
f = urllib2.urlopen(request)
except urllib2.HTTPError, e:
self.send_response(e.code)
except Exception, e:
print "Exception: %s" % str(e)
self.write_error(e)
else:
print "Successfully opened %s" % self.path
self.send_response(200) #OK
print "-- Response Info --"
for item in f.info().keys():
print "%s: %s" % (item, f.info()[item])
self.send_header(item, f.info()[item])
print "-- end Response Info --"
self.end_headers()
print "Reading..."
s = f.read()
print "Read successful."
f.close()
print "Writing..."
self.wfile.write(s)
print "Write successful."
#self.wfile.close()
def write_error(self, error):
self.send_response(200)
self.wfile.write("""<html>
<head>
<title>Error</title>
</head>
<body>
An error occured connecting to the address given.
<br/>
%s
</body>
</html>""" % htmlencode(str(error)))
self.wfile.close()
class WebProxy(HTTPServer):
def __init__(self, server_addr, proxy_addr=None):
HTTPServer.__init__(self, server_addr, WebProxyRequestHandler)
self.proxy_addr = proxy_addr
if __name__ == "__main__":
import sys
def usage():
print "Usage: %s [port [proxy addr proxy port]]" % sys.argv[1]
if len(sys.argv) >= 2:
try:
port = int(sys.argv[1])
except:
print "Port error."
usage()
sys.exit()
else:
port = 8077
proxy_addr = ""
if len(sys.argv) >= 4:
proxy_addr = sys.argv[2]
try:
proxy_port = int(sys.argv[3])
except:
print "Proxy port error."
usage()
sys.exit()
if proxy_addr:
proxy = WebProxy(("localhost", port), (proxy_addr, proxy_port))
print "Listening on %s:%d\nForwarding to %s:%d" % ("localhost",
port, proxy_addr, proxy_port)
else:
proxy = WebProxy(("localhost", port))
print "Listening on %s:%d" % ("localhost", port)
proxy.serve_forever()
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
*** Usenet.com - The #1 Usenet Newsgroup Service on The Planet! ***
http://www.usenet.com
Unlimited Download - 19 Seperate Servers - 90,000 groups - Uncensored
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
I have written a simple web proxy using the Python standard library
BaseHTTPRequestHandler. Right now, all it does is log the web traffic
passing through it to the console. I have been testing it by setting my
browser's proxy setting to localhost:8077 and browsing to various web
sites. Some web sites work fine (e.g. www.python.org). However, some web
sites simply seem to stall indefinitely (e.g. www.google.com). If I set
the same browser to connect directly to the Internet, the site comes up
close to immediately.
If anybody has any ideas about why this happens, or any coding mistakes I
may have made, I would appreciate the feedback.
Thanks,
Carl Waldbieser
--------- Python code below ------------------------------------
#############################################################
# Simple proxy
#############################################################
from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
from urlparse import urlparse
import urllib2
import re
def multiple_replace(adict, text):
"""Replace multiple patterns in a single pass."""
regex = re.compile("|".join(map(re.escape, adict.keys())))
return regex.sub(lambda match: adict[match.group(0)], text)
def htmlencode(s):
"""Replace HTML special characters (&,<,>,',") with entities."""
entities = {"&":"&", "<":"<", ">":">", "'":"'",
'"':"""}
return multiple_replace(entities, s)
class WebProxyRequestHandler(BaseHTTPRequestHandler):
"""A subclass of BaseHTTPRequestHandler that acts as a web proxy
server and can be chained with other web proxies.
"""
def do_GET(self):
"""Handles an HTTP GET.
"""
print "do_GET()"
self.get_post_impl()
def do_POST(self):
print "do_POST()"
length = self.headers["Content-Length"]
d = self.rfile.read(int(length))
self.get_post_impl(d)
def get_post_impl(self, data=None):
print "client host: %s\nclient port %d" % self.client_address
print "command: %s" % self.command
print "path: %s" % self.path
print "request_version: %s" % self.request_version
print "\n-- headers --"
for header in self.headers.keys():
print "%s: %s" % (header, self.headers.getheaders(header))
print "-- end headers --\n"
#Forward the request.
server = self.server
if server.proxy_addr:
(scheme, netloc, path, parameters, query, fragment) =
urlparse(self.path)
print "scheme: %s\nnetloc: %s\npath: %s\nparameters: %s\nquery:
%s\nfragment: %s" % (scheme, netloc, path, parameters, query, fragment)
url = "%s:%d" % server.proxy_addr
print "Proxy URL: %s" % url
self.retrieve_request(data, {'http':url})
else:
self.retrieve_request(data)
def retrieve_request(self, data, proxies={}):
request = urllib2.Request(self.path)
for header in self.headers.keys():
if header.lower() != "host" and header.lower() != "user-agent":
values = self.headers.getheaders(header)
value_string = " ".join(values)
request.add_header(header, value_string)
for proxy_type in proxies:
print "setting proxy: (%s, %s)" % (proxies[proxy_type],
proxy_type)
request.set_proxy(proxies[proxy_type], proxy_type)
if data != None:
request.add_data(data)
print "Attempting to open %s ..." % self.path
try:
f = urllib2.urlopen(request)
except urllib2.HTTPError, e:
self.send_response(e.code)
except Exception, e:
print "Exception: %s" % str(e)
self.write_error(e)
else:
print "Successfully opened %s" % self.path
self.send_response(200) #OK
print "-- Response Info --"
for item in f.info().keys():
print "%s: %s" % (item, f.info()[item])
self.send_header(item, f.info()[item])
print "-- end Response Info --"
self.end_headers()
print "Reading..."
s = f.read()
print "Read successful."
f.close()
print "Writing..."
self.wfile.write(s)
print "Write successful."
#self.wfile.close()
def write_error(self, error):
self.send_response(200)
self.wfile.write("""<html>
<head>
<title>Error</title>
</head>
<body>
An error occured connecting to the address given.
<br/>
%s
</body>
</html>""" % htmlencode(str(error)))
self.wfile.close()
class WebProxy(HTTPServer):
def __init__(self, server_addr, proxy_addr=None):
HTTPServer.__init__(self, server_addr, WebProxyRequestHandler)
self.proxy_addr = proxy_addr
if __name__ == "__main__":
import sys
def usage():
print "Usage: %s [port [proxy addr proxy port]]" % sys.argv[1]
if len(sys.argv) >= 2:
try:
port = int(sys.argv[1])
except:
print "Port error."
usage()
sys.exit()
else:
port = 8077
proxy_addr = ""
if len(sys.argv) >= 4:
proxy_addr = sys.argv[2]
try:
proxy_port = int(sys.argv[3])
except:
print "Proxy port error."
usage()
sys.exit()
if proxy_addr:
proxy = WebProxy(("localhost", port), (proxy_addr, proxy_port))
print "Listening on %s:%d\nForwarding to %s:%d" % ("localhost",
port, proxy_addr, proxy_port)
else:
proxy = WebProxy(("localhost", port))
print "Listening on %s:%d" % ("localhost", port)
proxy.serve_forever()
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
*** Usenet.com - The #1 Usenet Newsgroup Service on The Planet! ***
http://www.usenet.com
Unlimited Download - 19 Seperate Servers - 90,000 groups - Uncensored
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=