B
Bart Nessux
This is almost working. I've read up on queues and threads and I've
learned a lot, still not enough to fully understand what I'm doing
though, but I'm getting there. Much of this script was copied straight
from this example:
http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/284631
My goal is to scan a big private network (65,000 hosts) for HTTP
servers. Make a list of IPs that are running the servers and a list of
those that are not. I need to use threads to speed up the process. I can
do it sequentially, but it takes 2 days!!!
Everything in the script works except that it just hangs and never
produces the two results lists. Could a threading guru please offer some
advice?
Thanks... Bart
import urllib
import socket
import time
import Queue
import threading
######################
# Network Section #
######################
urls = []
x = 0
while x < 255:
x = x + 1
urls.append('http://192.168.1.' + str(x))
######################
# Queue Section #
######################
url_queue = Queue.Queue(65536)
for url in urls:
url_queue.put(url)
######################
# Thread Section #
######################
def test_http(url_queue, result_queue):
def sub_thread_proc(url, result):
try:
data = urllib.urlopen(url).read()
except Exception:
result.append(-1)
else:
result.append(1)
while 1:
try:
url = url_queue.get()
size = url_queue.qsize()
print size
except Queue.Empty:
return
print "Finished"
result = []
sub_thread = threading.Thread(target=sub_thread_proc,
args=(url,result))
sub_thread.setDaemon(True)
sub_thread.start()
sub_thread.join(HTTP_TIMEOUT)
if [] == result:
result_queue.put((url, "TIMEOUT"))
elif -1 == result[0]:
result_queue.put((url, "FAILED"))
else:
result_queue.put((url, result[0]))
HTTP_TIMEOUT = 20
workers = []
result_queue = Queue.Queue()
for thread_num in range(0, 64):
workers.append(threading.Thread(target=test_http, args=(url_queue,
result_queue)))
workers[-1].start()
for w in workers:
w.join()
web_servers = []
failures = []
while not result_queue.empty():
url, result = result_queue.get(0)
if isinstance(result, str):
failures.append((result, url))
else:
web_servers.append((result,url))
web_servers.sort()
failures.sort()
for result, url in web_servers:
print "%7d %s" % (result, url)
for result, url in failures:
print"%7s %s" % (result, url)
#############
# END #
#############
learned a lot, still not enough to fully understand what I'm doing
though, but I'm getting there. Much of this script was copied straight
from this example:
http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/284631
My goal is to scan a big private network (65,000 hosts) for HTTP
servers. Make a list of IPs that are running the servers and a list of
those that are not. I need to use threads to speed up the process. I can
do it sequentially, but it takes 2 days!!!
Everything in the script works except that it just hangs and never
produces the two results lists. Could a threading guru please offer some
advice?
Thanks... Bart
import urllib
import socket
import time
import Queue
import threading
######################
# Network Section #
######################
urls = []
x = 0
while x < 255:
x = x + 1
urls.append('http://192.168.1.' + str(x))
######################
# Queue Section #
######################
url_queue = Queue.Queue(65536)
for url in urls:
url_queue.put(url)
######################
# Thread Section #
######################
def test_http(url_queue, result_queue):
def sub_thread_proc(url, result):
try:
data = urllib.urlopen(url).read()
except Exception:
result.append(-1)
else:
result.append(1)
while 1:
try:
url = url_queue.get()
size = url_queue.qsize()
print size
except Queue.Empty:
return
print "Finished"
result = []
sub_thread = threading.Thread(target=sub_thread_proc,
args=(url,result))
sub_thread.setDaemon(True)
sub_thread.start()
sub_thread.join(HTTP_TIMEOUT)
if [] == result:
result_queue.put((url, "TIMEOUT"))
elif -1 == result[0]:
result_queue.put((url, "FAILED"))
else:
result_queue.put((url, result[0]))
HTTP_TIMEOUT = 20
workers = []
result_queue = Queue.Queue()
for thread_num in range(0, 64):
workers.append(threading.Thread(target=test_http, args=(url_queue,
result_queue)))
workers[-1].start()
for w in workers:
w.join()
web_servers = []
failures = []
while not result_queue.empty():
url, result = result_queue.get(0)
if isinstance(result, str):
failures.append((result, url))
else:
web_servers.append((result,url))
web_servers.sort()
failures.sort()
for result, url in web_servers:
print "%7d %s" % (result, url)
for result, url in failures:
print"%7s %s" % (result, url)
#############
# END #
#############