Newbie, list has no attribute iteritems

R

rabad

Hi,
I've created a custom filter based on HTMLParser, with the following
source:

class Filter(HTMLParser):

def __init__(self, keyfile):
HTMLParser.__init__(self)
mykwfile = open(keyfile, 'r')
self._keywords = []
for kw in mykwfile.read().split('\n'):
self._keywords.append(kw)
print kw
mykwfile.close()
self._toProcess = False
self.stack = []

def handle_starttag(self, tag, attrs):
if 'a' != tag:
self.stack.append(self.__html_start_tag(tag, attrs))
return
attrs = dict(attrs)
self._toProcess = True
for key in self._keywords:
if 'a' == tag:
p = re.compile(key, re.IGNORECASE)
if 'href' in attrs:
attrs['href'] = p.sub(r'XXXXX',attrs['href'])
self.stack.append(self.__html_start_tag(tag, attrs))

def handle_startendtag(self, tag, attrs):
if 'img' != tag and 'meta' != tag:
self.stack.append(self.__html_startend_tag(tag, attrs))
return
attrs = dict(attrs)
self._toProcess = True
for key in self._keywords:
p = re.compile(key, re.IGNORECASE)
if 'img' == tag:
if 'src' in attrs:
attrs['src'] = p.sub(r'XXXXX',attrs['src'])
if 'alt' in attrs:
attrs['alt'] = p.sub(r'XXXXX',attrs['alt'])
if 'meta' == tag:
if 'description' in attrs:
attrs['description'] =
p.sub(r'XXXXX',attrs['description'])
if 'content' in attrs:
attrs['content'] =
p.sub(r'XXXXX',attrs['content'])
if 'meta' == tag or 'img' == tag:
self._toProcess = False
self.stack.append(self.__html_startend_tag(tag, attrs))

def handle_endtag(self, tag):
self.stack.append(self.__html_end_tag(tag))
if self._toProcess:
self._toProcess = False

def handle_data(self, data):
if self._toProcess:
for key in self._keywords:
p = re.compile(key,re.IGNORECASE)
data = p.sub(r'XXXXX',data)
self.stack.append(data)

def __html_start_tag(self, tag, attrs):
return '<%s%s>' % (tag, self.__html_attrs(attrs))

def __html_startend_tag(self, tag, attrs):
return '<%s%s/>' % (tag, self.__html_attrs(attrs))

def __html_end_tag(self, tag):
return '</%s>' % (tag)

def __html_attrs(self, attrs):
_attrs = ''
if attrs:
_attrs = ' %s' % (' '.join([('%s="%s"' % (k,v)) for k,v in
attrs.iteritems()]))
return _attrs

But when I use it, it gives me the following error message:
ERROR Processor exception: AttributeError: 'list' object has no
attribute 'it
eritems'
Traceback (most recent call last):
File "d:\esp\lib\python2.3\processors\DocDumpF.py", line 87, in
Process
p.feed(document.GetValue("data"))
File "HTMLParser.py", line 108, in feed
File "HTMLParser.py", line 148, in goahead
File "HTMLParser.py", line 281, in parse_starttag
File "d:\esp\lib\python2.3\processors\DocDumpF.py", line 121, in
handle_startt
ag
self.stack.append(self.__html_start_tag(tag, attrs))
File "d:\esp\lib\python2.3\processors\DocDumpF.py", line 167, in
__html_start_
tag
return '<%s%s>' % (tag, self.__html_attrs(attrs))
File "d:\esp\lib\python2.3\processors\DocDumpF.py", line 178, in
__html_attrs
_attrs = ' %s' % (' '.join([('%s="%s"' % (k,v)) for k,v in
attrs.iteritems()
]))

Anybody knows why it says attrs is not a list element?
Thanks,
Rubén
 
J

Justin Ezequiel

def handle_starttag(self, tag, attrs): # <-- attrs here is a
list
if 'a' != tag:
self.stack.append(self.__html_start_tag(tag, attrs)) #
<-- attrs here is still a list
return
attrs = dict(attrs) # <-- now attrs is a dictionary
 
B

Bruno Desthuilliers

rabad a écrit :
Hi,
I've created a custom filter based on HTMLParser, with the following
source:

(snip)
But when I use it, it gives me the following error message:
ERROR Processor exception: AttributeError: 'list' object has no
attribute 'iteritems' (snip)
File "d:\esp\lib\python2.3\processors\DocDumpF.py", line 178, in
__html_attrs
_attrs = ' %s' % (' '.join([('%s="%s"' % (k,v)) for k,v in
attrs.iteritems()
]))

Anybody knows why it says attrs is not a list element?

Actually, what the traceback says is that
1/ attrs is a list object
2/ list objects have no attribute named iteritems

If you assumed it was a dict, then it's probably time to re-read
HTMLParser's doc. Else if you assumed list had an iteritems method, then
it's probably time to re-read the Python's tutorial !-)

IIRC, HTMLParser represents attributes as a list of (attrname, value)
pairs. If so (please check it out), your method should be rewritten as

return ' %s' % (' '.join(('%s="%s"') % attr for attr in attrs)


As a side note: __double_leading_undescores is probably a bit extrem.
The convention for implementation attributes is _single_leading_underscore.
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

Forum statistics

Threads
473,755
Messages
2,569,536
Members
45,015
Latest member
AmbrosePal

Latest Threads

Top