Newbie, list has no attribute iteritems

Discussion in 'Python' started by rabad, Jul 4, 2008.

  1. rabad

    rabad Guest

    Hi,
    I've created a custom filter based on HTMLParser, with the following
    source:

    class Filter(HTMLParser):

    def __init__(self, keyfile):
    HTMLParser.__init__(self)
    mykwfile = open(keyfile, 'r')
    self._keywords = []
    for kw in mykwfile.read().split('\n'):
    self._keywords.append(kw)
    print kw
    mykwfile.close()
    self._toProcess = False
    self.stack = []

    def handle_starttag(self, tag, attrs):
    if 'a' != tag:
    self.stack.append(self.__html_start_tag(tag, attrs))
    return
    attrs = dict(attrs)
    self._toProcess = True
    for key in self._keywords:
    if 'a' == tag:
    p = re.compile(key, re.IGNORECASE)
    if 'href' in attrs:
    attrs['href'] = p.sub(r'XXXXX',attrs['href'])
    self.stack.append(self.__html_start_tag(tag, attrs))

    def handle_startendtag(self, tag, attrs):
    if 'img' != tag and 'meta' != tag:
    self.stack.append(self.__html_startend_tag(tag, attrs))
    return
    attrs = dict(attrs)
    self._toProcess = True
    for key in self._keywords:
    p = re.compile(key, re.IGNORECASE)
    if 'img' == tag:
    if 'src' in attrs:
    attrs['src'] = p.sub(r'XXXXX',attrs['src'])
    if 'alt' in attrs:
    attrs['alt'] = p.sub(r'XXXXX',attrs['alt'])
    if 'meta' == tag:
    if 'description' in attrs:
    attrs['description'] =
    p.sub(r'XXXXX',attrs['description'])
    if 'content' in attrs:
    attrs['content'] =
    p.sub(r'XXXXX',attrs['content'])
    if 'meta' == tag or 'img' == tag:
    self._toProcess = False
    self.stack.append(self.__html_startend_tag(tag, attrs))

    def handle_endtag(self, tag):
    self.stack.append(self.__html_end_tag(tag))
    if self._toProcess:
    self._toProcess = False

    def handle_data(self, data):
    if self._toProcess:
    for key in self._keywords:
    p = re.compile(key,re.IGNORECASE)
    data = p.sub(r'XXXXX',data)
    self.stack.append(data)

    def __html_start_tag(self, tag, attrs):
    return '<%s%s>' % (tag, self.__html_attrs(attrs))

    def __html_startend_tag(self, tag, attrs):
    return '<%s%s/>' % (tag, self.__html_attrs(attrs))

    def __html_end_tag(self, tag):
    return '</%s>' % (tag)

    def __html_attrs(self, attrs):
    _attrs = ''
    if attrs:
    _attrs = ' %s' % (' '.join([('%s="%s"' % (k,v)) for k,v in
    attrs.iteritems()]))
    return _attrs

    But when I use it, it gives me the following error message:
    ERROR Processor exception: AttributeError: 'list' object has no
    attribute 'it
    eritems'
    Traceback (most recent call last):
    File "d:\esp\lib\python2.3\processors\DocDumpF.py", line 87, in
    Process
    p.feed(document.GetValue("data"))
    File "HTMLParser.py", line 108, in feed
    File "HTMLParser.py", line 148, in goahead
    File "HTMLParser.py", line 281, in parse_starttag
    File "d:\esp\lib\python2.3\processors\DocDumpF.py", line 121, in
    handle_startt
    ag
    self.stack.append(self.__html_start_tag(tag, attrs))
    File "d:\esp\lib\python2.3\processors\DocDumpF.py", line 167, in
    __html_start_
    tag
    return '<%s%s>' % (tag, self.__html_attrs(attrs))
    File "d:\esp\lib\python2.3\processors\DocDumpF.py", line 178, in
    __html_attrs
    _attrs = ' %s' % (' '.join([('%s="%s"' % (k,v)) for k,v in
    attrs.iteritems()
    ]))

    Anybody knows why it says attrs is not a list element?
    Thanks,
    Rubén
     
    rabad, Jul 4, 2008
    #1
    1. Advertisements

  2. def handle_starttag(self, tag, attrs): # <-- attrs here is a
    list
    if 'a' != tag:
    self.stack.append(self.__html_start_tag(tag, attrs)) #
    <-- attrs here is still a list
    return
    attrs = dict(attrs) # <-- now attrs is a dictionary
     
    Justin Ezequiel, Jul 4, 2008
    #2
    1. Advertisements

  3. rabad a écrit :
    Actually, what the traceback says is that
    1/ attrs is a list object
    2/ list objects have no attribute named iteritems

    If you assumed it was a dict, then it's probably time to re-read
    HTMLParser's doc. Else if you assumed list had an iteritems method, then
    it's probably time to re-read the Python's tutorial !-)

    IIRC, HTMLParser represents attributes as a list of (attrname, value)
    pairs. If so (please check it out), your method should be rewritten as

    return ' %s' % (' '.join(('%s="%s"') % attr for attr in attrs)


    As a side note: __double_leading_undescores is probably a bit extrem.
    The convention for implementation attributes is _single_leading_underscore.
     
    Bruno Desthuilliers, Jul 4, 2008
    #3
    1. Advertisements

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments (here). After that, you can post your question and our members will help you out.
Similar Threads
Loading...