G
George
Hello everyone I know many have helped but I cannot get this to work
out correctly. I cannot use BeautifulSoup at all. I need to:
Parse the HTML and extracting all the links, convert them to IP
addresses, and build a list of all these IP addresses, thwn I need to
sort the list and remove the duplicates so that unit testing will work.
Please help I have never done python before and I can't seem to get the
hang of it.
"""
Module to print IP addresses of tags in web file containing HTML
['0.0.0.0', '128.255.44.134', '128.255.45.54']
['0.0.0.0', '128.255.135.49', '128.255.244.57', '128.255.30.11',
'128.255.34.132', '128.255.44.51', '128.255.45.53',
'128.255.45.54', '129.255.241.42', '64.202.167.129']
"""
import htmllib
import formatter
import urllib
import socket
from urlparse import urlparse
class HTML_Parser(htmllib.HTMLParser):
def __init__(self):
htmllib.HTMLParser.__init__(self,
formatter.AbstractFormatter(formatter.NullWriter()))
def start_a(self, args):
for key, value in args:
if key.lower() == 'href':
global listURL
def showIPnums(URL):
parser = HTML_Parser()
connect = urllib.urlopen(URL)
data = connect.read()
parser.feed(data)
parser.close()
connect.close()
if __name__ == '__main__':
import doctest, sys
doctest.testmod(sys.modules[__name__])
out correctly. I cannot use BeautifulSoup at all. I need to:
Parse the HTML and extracting all the links, convert them to IP
addresses, and build a list of all these IP addresses, thwn I need to
sort the list and remove the duplicates so that unit testing will work.
Please help I have never done python before and I can't seem to get the
hang of it.
"""
Module to print IP addresses of tags in web file containing HTML
['0.0.0.0', '128.255.44.134', '128.255.45.54']
['0.0.0.0', '128.255.135.49', '128.255.244.57', '128.255.30.11',
'128.255.34.132', '128.255.44.51', '128.255.45.53',
'128.255.45.54', '129.255.241.42', '64.202.167.129']
"""
import htmllib
import formatter
import urllib
import socket
from urlparse import urlparse
class HTML_Parser(htmllib.HTMLParser):
def __init__(self):
htmllib.HTMLParser.__init__(self,
formatter.AbstractFormatter(formatter.NullWriter()))
def start_a(self, args):
for key, value in args:
if key.lower() == 'href':
global listURL
def showIPnums(URL):
parser = HTML_Parser()
connect = urllib.urlopen(URL)
data = connect.read()
parser.feed(data)
parser.close()
connect.close()
if __name__ == '__main__':
import doctest, sys
doctest.testmod(sys.modules[__name__])