J
john.lehmann
Attacked is a piece of code which first hits the login page
successfully and receives back login cookies. But then when I attempt
to hit a page which is restricted to logged in users only, I fail.
That seems to be because I am not successfully re-attaching the cookies
to the header portion of the this request. I have tried 2 methods
which should both work I think. The first was to use install_opener to
attach the cookie handler back to urlopen. The second method was to
use the cookiehandler method add_cookie_header. But in both cases,
before sending out the 2nd request, it seems to have empty headers --
which indicates to me that the necessary cookies have not been
attacked.
I also tryed messing with the policy quite a bit, thinking that might
be causing the cookies not to be returned. First I used the default,
then set some flags on the default, then even overrode methods on the
default to make it as lenient as possible. This had no apparent
effect.
Thanks a lot!
Below I have pasted the most relevant code section, as well as my full
code file. Apologies for all the comments, but I wanted to show what I
had tried.
-----------------
RELEVANT CODE (snipped from full code)
# NOW GO TO PAGE RESTRICTED TO LOGGED IN PEOPLE
the_url =
"http://www.dpreview.com/forums/login.asp?jump=editprofile.asp"
req = urllib2.Request(the_url)
#print "headers:", req.headers
#cj.add_cookie_header(req)
# EXPECT THESE HEADERS TO BE NON-EMPTY - BUT THEY ARE EMPTY,
# NO COOKIES RETURNED?
print "headers:", req.headers
# THIS OPEN FAILS - I GET - "NEED TO LOGIN" PAGE
#handle = opener.open(req)
handle = urllib2.urlopen(req)
the_page = handle.read()
-----------------
FULL CODE
#!/usr/bin/python
import urllib
import urllib2
import re
import os
from cookielib import *
class MyCookiePolicy(DefaultCookiePolicy):
def __init__(self):
DefaultCookiePolicy.__init__(self, rfc2965=True,
hide_cookie2=False, strict_ns_domain=DefaultCookiePolicy.DomainLiberal)
def set_ok(self, cookie, request):
return True
def return_ok(self, cookie, request):
return True
def domain_return_ok(self, cookie, request):
return True
def path_return_ok(self, cookie, request):
return True
the_url = 'http://www.dpreview.com/forums/login_post.asp'
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
values = {
'email' : '****',
'password' : '****',
#"remember" : "checked", # <- create permanent cookie
'jump' : "/forums/"
}
# also "remember" : "remember"
# INITIAL REQUEST WITH USER INFO
headers = { 'User-Agent' : user_agent }
data = urllib.urlencode(values)
req = urllib2.Request(the_url, data, headers)
# COOKIE POLICY
# tried using several configurations of the default cookie policy
#policy = DefaultCookiePolicy(rfc2965=True, hide_cookie2=False,
strict_ns_domain=DefaultCookiePolicy.DomainLiberal)
# tried using my own custom cookie policy
#policy = MyCookiePolicy()
policy = DefaultCookiePolicy(rfc2965=True, hide_cookie2=False)
# CREATE COOKIE JAR WITH POLICY
cj = MozillaCookieJar()
cj.set_policy(policy)
# CREATE OPENER, AND OPEN PAGE
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
urllib2.install_opener(opener)
#handle = opener.open(req)
handle = urllib2.urlopen(req)
the_page = handle.read()
# SHOW COOKIES COLLECTED - LOOKS GOOD HERE
for c in cj:
print "COOKIE:", c
print "URL:", handle.geturl()
print "INFO:", handle.info()
#DEMONSTRATE WE'RE LOGGED IN
for line in the_page.split('\n'):
line = line.strip()
if re.search("Welcome to the", line):
print "MESSAGE:", line
# NOW GO TO PAGE RESTRICTED TO LOGGED IN PEOPLE
# - tried using the install_opener above
# - tried using add_cookie_header
# - either way, can't seem to get cookies in the header of this request
the_url =
"http://www.dpreview.com/forums/login.asp?jump=editprofile.asp"
req = urllib2.Request(the_url)
#print "headers:", req.headers
#cj.add_cookie_header(req)
# EXPECT THESE HEADERS TO BE NON-EMPTY
print "headers:", req.headers
#handle = opener.open(req)
handle = urllib2.urlopen(req)
the_page = handle.read()
# THIS ALSO PROVES LOGIN-STATE WAS LOST
for line in the_page.split('\n'):
line = line.strip()
if re.search("To access", line):
print "MESSAGE:", line
print "URL:", handle.geturl()
print "INFO:", handle.info()
successfully and receives back login cookies. But then when I attempt
to hit a page which is restricted to logged in users only, I fail.
That seems to be because I am not successfully re-attaching the cookies
to the header portion of the this request. I have tried 2 methods
which should both work I think. The first was to use install_opener to
attach the cookie handler back to urlopen. The second method was to
use the cookiehandler method add_cookie_header. But in both cases,
before sending out the 2nd request, it seems to have empty headers --
which indicates to me that the necessary cookies have not been
attacked.
I also tryed messing with the policy quite a bit, thinking that might
be causing the cookies not to be returned. First I used the default,
then set some flags on the default, then even overrode methods on the
default to make it as lenient as possible. This had no apparent
effect.
Thanks a lot!
Below I have pasted the most relevant code section, as well as my full
code file. Apologies for all the comments, but I wanted to show what I
had tried.
-----------------
RELEVANT CODE (snipped from full code)
# NOW GO TO PAGE RESTRICTED TO LOGGED IN PEOPLE
the_url =
"http://www.dpreview.com/forums/login.asp?jump=editprofile.asp"
req = urllib2.Request(the_url)
#print "headers:", req.headers
#cj.add_cookie_header(req)
# EXPECT THESE HEADERS TO BE NON-EMPTY - BUT THEY ARE EMPTY,
# NO COOKIES RETURNED?
print "headers:", req.headers
# THIS OPEN FAILS - I GET - "NEED TO LOGIN" PAGE
#handle = opener.open(req)
handle = urllib2.urlopen(req)
the_page = handle.read()
-----------------
FULL CODE
#!/usr/bin/python
import urllib
import urllib2
import re
import os
from cookielib import *
class MyCookiePolicy(DefaultCookiePolicy):
def __init__(self):
DefaultCookiePolicy.__init__(self, rfc2965=True,
hide_cookie2=False, strict_ns_domain=DefaultCookiePolicy.DomainLiberal)
def set_ok(self, cookie, request):
return True
def return_ok(self, cookie, request):
return True
def domain_return_ok(self, cookie, request):
return True
def path_return_ok(self, cookie, request):
return True
the_url = 'http://www.dpreview.com/forums/login_post.asp'
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
values = {
'email' : '****',
'password' : '****',
#"remember" : "checked", # <- create permanent cookie
'jump' : "/forums/"
}
# also "remember" : "remember"
# INITIAL REQUEST WITH USER INFO
headers = { 'User-Agent' : user_agent }
data = urllib.urlencode(values)
req = urllib2.Request(the_url, data, headers)
# COOKIE POLICY
# tried using several configurations of the default cookie policy
#policy = DefaultCookiePolicy(rfc2965=True, hide_cookie2=False,
strict_ns_domain=DefaultCookiePolicy.DomainLiberal)
# tried using my own custom cookie policy
#policy = MyCookiePolicy()
policy = DefaultCookiePolicy(rfc2965=True, hide_cookie2=False)
# CREATE COOKIE JAR WITH POLICY
cj = MozillaCookieJar()
cj.set_policy(policy)
# CREATE OPENER, AND OPEN PAGE
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
urllib2.install_opener(opener)
#handle = opener.open(req)
handle = urllib2.urlopen(req)
the_page = handle.read()
# SHOW COOKIES COLLECTED - LOOKS GOOD HERE
for c in cj:
print "COOKIE:", c
print "URL:", handle.geturl()
print "INFO:", handle.info()
#DEMONSTRATE WE'RE LOGGED IN
for line in the_page.split('\n'):
line = line.strip()
if re.search("Welcome to the", line):
print "MESSAGE:", line
# NOW GO TO PAGE RESTRICTED TO LOGGED IN PEOPLE
# - tried using the install_opener above
# - tried using add_cookie_header
# - either way, can't seem to get cookies in the header of this request
the_url =
"http://www.dpreview.com/forums/login.asp?jump=editprofile.asp"
req = urllib2.Request(the_url)
#print "headers:", req.headers
#cj.add_cookie_header(req)
# EXPECT THESE HEADERS TO BE NON-EMPTY
print "headers:", req.headers
#handle = opener.open(req)
handle = urllib2.urlopen(req)
the_page = handle.read()
# THIS ALSO PROVES LOGIN-STATE WAS LOST
for line in the_page.split('\n'):
line = line.strip()
if re.search("To access", line):
print "MESSAGE:", line
print "URL:", handle.geturl()
print "INFO:", handle.info()