R
Robert.R.Emmel
Hello,
I am using the threading module and the Queue module in python to to
send out shipment tracking URL requests.
Is there a way to timeout a thread within a Queue?
I think the way I have it now the thread will wait until something is
returned and will basically wait forever for that something.
Between the waiting for something to be returned and the timeouts on
the database connections, the program appears to just hang like it has
stopped working. Any suggestions?
Here is a sample of code I am using (left out some of the code for
brevity):
import urllib
from HTMLParser import HTMLParser
import threading
import Queue
NTHREADS = 100
....
def Web_Retrieve(inpque, outqueue):
N = 1
try:
connection = MySQLdb.connect(host="hostname", port=3306,
user="username", passwd="password", db="Mydatabase")
print "t" + str(N)+ " - Database Connection Established !!"
cursor = connection.cursor()
except MySQLdb.OperationalError, message:
errorMessage = "t" + str(N)+ " -Error %d:\n%s" % (message[0],
message[1])
else:
DBRow = inpque.get()
while not (DBRow[0] == ''):
PNum = DBRow[1]
PNum = RemoveHyphens(RemoveSpaces(PNum))
print "t" + str(N)+ " -PNum : " + PNum
....
# Setup URL to retrieve status web page depending on
Carrier SCAC
if SCAC == "RDWY":
pURL = "http://www.quiktrak.roadway.com/cgi-
bin/quiktrak?"
bURL = "&pro0=" + PNum
if DEBUG > 90 or (DEBUG > 30 and DEBUG < 40):
print "t" + str(N)+ " -URL: ", pURL +
bURL
WURL = pURL + bURL
# Retrieve status web page, assign it to a
variable and close connection
try:
f = urllib.urlopen(pURL+bURL)
s = f.read()
f.close()
except IOError, e:
print 't' + str(N)+ ' -I/O Error:
',e.strerror
## continue
....
outqueue.put((PR, s, WURL, s12))
DBRow = inpque.get()
N = N+ 1
outqueue.put(None)
cursor.close()
connection.commit()
connection.close()
print "t" + str(N)+ " -Database Closed"
## Main processing file.
def WebProcess(Mode):
## Get file listing from the proper directory depending on mode
if Mode == 'B':
IFilenames = os.listdir(os.curdir+os.sep+'Input')
NTHREADS = 100
....
# Open connection to MySql
try:
connection = MySQLdb.connect(host="hostname", port=3306,
user="username", passwd="password", db="mydatabase")
print "Database Connection Established !!"
cursor = connection.cursor()
except MySQLdb.OperationalError, message:
errorMessage = "Error %d:\n%s" % (message[0], message[1])
else:
inputs = Queue.Queue(0)
results = Queue.Queue(0)
thread_pool = []
for ii in range(NTHREADS):
thread = threading.Thread(target=Web_Retrieve,
args=(inputs, results))
thread.start()
thread_pool.append(thread)
## Retrieve BOL's from Database for web retrieval
print "Current Mode is : ", Mode
print "Length of Mode string is : ", len(Mode)
Mode_results = cursor.execute( "Select * from bol_table where
Mode = %s and (Expired IS NULL or Expired IS False);", (Mode.strip()))
print "Mode Query Results: ", Mode_results
if Mode_results > 0:
print " Do Web Page lookups !!! "
DBRows = cursor.fetchall()
for DBRow in DBRows:
inputs.put(DBRow)
for ii in range(len(DBRows)):
PR, s, WURL, s12 = results.get()
## print "PR is : "+PR+" S's first 100 char's are
"+s[0:100]+" WURL is : "+WURL
print
"+-------------------------------------------------------------+"
print "PR is : " + PR
print "s is (First 100) : " + s[0:100]
print "WURL is : " + WURL
print "s12 is : " + s12
print
"+-------------------------------------------------------------+"
.....
## Clear out thread pool
for thread in thread_pool:
inputs.put(('',''))
.....
For the times I have run into the "<scr" + "ipt>" on web pages I take
the f.read() string s and pass it to this function:
## fix webpages that have the 'scr' + 'ipt' split on them.
def fixscript(x):
SQ = 0
EQ = 0
## if DEBUG > 90:
## print "fixscript length of x: ", len(x)
while True:
SQ = x.find('scr" + "ipt', SQ + 12)
## if DEBUG > 90:
## print "SQ : ", SQ
if SQ <= 0:
break
x = x[0:SQ + 3] + x[SQ + 8:]
return x
Just passing this along to those who have run into this problem when
parsing a web page.
I am using the threading module and the Queue module in python to to
send out shipment tracking URL requests.
Is there a way to timeout a thread within a Queue?
I think the way I have it now the thread will wait until something is
returned and will basically wait forever for that something.
Between the waiting for something to be returned and the timeouts on
the database connections, the program appears to just hang like it has
stopped working. Any suggestions?
Here is a sample of code I am using (left out some of the code for
brevity):
import urllib
from HTMLParser import HTMLParser
import threading
import Queue
NTHREADS = 100
....
def Web_Retrieve(inpque, outqueue):
N = 1
try:
connection = MySQLdb.connect(host="hostname", port=3306,
user="username", passwd="password", db="Mydatabase")
print "t" + str(N)+ " - Database Connection Established !!"
cursor = connection.cursor()
except MySQLdb.OperationalError, message:
errorMessage = "t" + str(N)+ " -Error %d:\n%s" % (message[0],
message[1])
else:
DBRow = inpque.get()
while not (DBRow[0] == ''):
PNum = DBRow[1]
PNum = RemoveHyphens(RemoveSpaces(PNum))
print "t" + str(N)+ " -PNum : " + PNum
....
# Setup URL to retrieve status web page depending on
Carrier SCAC
if SCAC == "RDWY":
pURL = "http://www.quiktrak.roadway.com/cgi-
bin/quiktrak?"
bURL = "&pro0=" + PNum
if DEBUG > 90 or (DEBUG > 30 and DEBUG < 40):
print "t" + str(N)+ " -URL: ", pURL +
bURL
WURL = pURL + bURL
# Retrieve status web page, assign it to a
variable and close connection
try:
f = urllib.urlopen(pURL+bURL)
s = f.read()
f.close()
except IOError, e:
print 't' + str(N)+ ' -I/O Error:
',e.strerror
## continue
....
outqueue.put((PR, s, WURL, s12))
DBRow = inpque.get()
N = N+ 1
outqueue.put(None)
cursor.close()
connection.commit()
connection.close()
print "t" + str(N)+ " -Database Closed"
## Main processing file.
def WebProcess(Mode):
## Get file listing from the proper directory depending on mode
if Mode == 'B':
IFilenames = os.listdir(os.curdir+os.sep+'Input')
NTHREADS = 100
....
# Open connection to MySql
try:
connection = MySQLdb.connect(host="hostname", port=3306,
user="username", passwd="password", db="mydatabase")
print "Database Connection Established !!"
cursor = connection.cursor()
except MySQLdb.OperationalError, message:
errorMessage = "Error %d:\n%s" % (message[0], message[1])
else:
inputs = Queue.Queue(0)
results = Queue.Queue(0)
thread_pool = []
for ii in range(NTHREADS):
thread = threading.Thread(target=Web_Retrieve,
args=(inputs, results))
thread.start()
thread_pool.append(thread)
## Retrieve BOL's from Database for web retrieval
print "Current Mode is : ", Mode
print "Length of Mode string is : ", len(Mode)
Mode_results = cursor.execute( "Select * from bol_table where
Mode = %s and (Expired IS NULL or Expired IS False);", (Mode.strip()))
print "Mode Query Results: ", Mode_results
if Mode_results > 0:
print " Do Web Page lookups !!! "
DBRows = cursor.fetchall()
for DBRow in DBRows:
inputs.put(DBRow)
for ii in range(len(DBRows)):
PR, s, WURL, s12 = results.get()
## print "PR is : "+PR+" S's first 100 char's are
"+s[0:100]+" WURL is : "+WURL
"+-------------------------------------------------------------+"
print "PR is : " + PR
print "s is (First 100) : " + s[0:100]
print "WURL is : " + WURL
print "s12 is : " + s12
"+-------------------------------------------------------------+"
.....
## Clear out thread pool
for thread in thread_pool:
inputs.put(('',''))
.....
For the times I have run into the "<scr" + "ipt>" on web pages I take
the f.read() string s and pass it to this function:
## fix webpages that have the 'scr' + 'ipt' split on them.
def fixscript(x):
SQ = 0
EQ = 0
## if DEBUG > 90:
## print "fixscript length of x: ", len(x)
while True:
SQ = x.find('scr" + "ipt', SQ + 12)
## if DEBUG > 90:
## print "SQ : ", SQ
if SQ <= 0:
break
x = x[0:SQ + 3] + x[SQ + 8:]
return x
Just passing this along to those who have run into this problem when
parsing a web page.