Re: Best way to make a list unique?

Discussion in 'Python' started by Michael Spencer, Mar 9, 2005.

  1. Delaney, Timothy C (Timothy) wrote:
    > Michael Hoffman wrote:
    >
    >
    >>>>For those who don't know, these implement a hash set/map which
    >>>>iterates in the order that the keys were first added to the set/map.

    >>
    >>I would love to see such a thing.

    >
    >
    > I've proposed this on python-dev, but the general feeling so far is
    > against it. So far the only use case is to remove duplicates without
    > changing order, and there are iterator-based solutions which would
    > normally be preferable.
    >
    > It's pretty simple to roll your own, and I'll probably put together a
    > Cookbook recipe for it.
    >
    > Tim Delaney


    Here's something to work with:

    class OrdSet(object):
    def __init__(self, iterable):
    """Build an ordered, unique collection of hashable items"""
    self._data = {None:[None, None]} # None is the pointer to the first
    # element. This is unsatisfactory
    # because it cannot then be a member
    # of the collection
    self._last = None
    self.update(iterable)

    def add(self, obj):
    """Add an element to the collection"""
    data = self._data
    if not obj in data:
    last = self._last
    data[last][1] = obj
    data[obj] = [last, None]
    self._last = obj

    def update(self, iterable):
    """Update the collection with the union of itself and another"""
    obj = self._last
    data = self._data
    last = data[obj][0]
    for item in iterable:
    if item not in data:
    data[obj] = [last, item]
    last, obj = obj, item
    data[obj] = [last, None]
    self._last = obj

    def remove(self, item):
    """Remove an element from a set; it must be a member.

    If the element is not a member, raise a KeyError."""
    data = self._data
    prev, next = data[item]
    data[prev][1] = next
    data[next][0] = prev

    def discard(self, item):
    """Remove an element from a set if it is a member.
    If the element is not a member, do nothing."""
    try:
    self.remove(item)
    except KeyError:
    pass

    def __contains__(self, item):
    return item in self._data

    def pop(self):
    """Remove and the return the oldest element"""
    data = self._data
    prev, first = data[None]
    data[None] = [None,data[first][1]]
    return first

    def clear(self):
    self.__init__([])

    def __iter__(self):
    """Iterate over the collection in order"""
    data = self._data
    prev, next = data[None]
    while next is not None:
    yield next
    prev, next = data[next]

    def __len__(self):
    return len(self._data)-1

    def __repr__(self):
    return "%s(%s)" % (self.__class__.__name__,list(self))

    >>> a= OrdSet(range(10))
    >>> a

    OrdSet([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    >>> a.update(range(5,15))
    >>> a

    OrdSet([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
    >>> a.discard(8)
    >>> a

    OrdSet([0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14])
    >>>



    Michael
    Michael Spencer, Mar 9, 2005
    #1
    1. Advertising

  2. Michael Spencer <> wrote:

    Nice. When you replace None by an object(), you have no restriction on
    the elements any more:

    > Here's something to work with:
    >
    > class OrdSet(object):
    > def __init__(self, iterable):
    > """Build an ordered, unique collection of hashable items"""
    > #self._data = {None:[None, None]} # None is the pointer to the first
    > # # element. This is unsatisfactory
    > # # because it cannot then be a
    > # # member of the collection
    > #self._last = None

    self._last = self._root = root = object()
    self._data = {root:[root, root]}
    > self.update(iterable)
    >
    > def add(self, obj):
    > """Add an element to the collection"""
    > data = self._data
    > if not obj in data:
    > last = self._last
    > data[last][1] = obj
    > #data[obj] = [last, None]

    data[obj] = [last, self._root]
    > self._last = obj
    >
    > def update(self, iterable):
    > """Update the collection with the union of itself and another"""
    > obj = self._last
    > data = self._data
    > last = data[obj][0]
    > for item in iterable:
    > if item not in data:
    > data[obj] = [last, item]
    > last, obj = obj, item
    > #data[obj] = [last, None]

    data[obj] = [last, self._root]
    > self._last = obj
    >
    > def remove(self, item):
    > """Remove an element from a set; it must be a member.
    >
    > If the element is not a member, raise a KeyError."""
    > data = self._data
    > prev, next = data[item]
    > data[prev][1] = next
    > data[next][0] = prev
    >
    > def discard(self, item):
    > """Remove an element from a set if it is a member.
    > If the element is not a member, do nothing."""
    > try:
    > self.remove(item)
    > except KeyError:
    > pass
    >
    > def __contains__(self, item):
    > return item in self._data
    >
    > def pop(self):
    > """Remove and the return the oldest element"""
    > data = self._data
    > #prev, first = data[None]
    > #data[None] = [None,data[first][1]]

    root = self._root
    prev, first = data[root]
    data[root] = [root,data[first][1]]
    > return first
    >
    > def clear(self):
    > self.__init__([])
    >
    > def __iter__(self):
    > """Iterate over the collection in order"""
    > data = self._data
    > #prev, next = data[None]
    > #while next is not None:

    root = self._root
    prev, next = data[root]
    while next is not root:
    > yield next
    > prev, next = data[next]
    >
    > def __len__(self):
    > return len(self._data)-1
    >
    > def __repr__(self):
    > return "%s(%s)" % (self.__class__.__name__,list(self))


    >>> a=OrdSet([None,1,None,3])
    >>> a

    OrdSet([None, 1, 3])

    Marc
    Marc Christiansen, Mar 9, 2005
    #2
    1. Advertising

  3. Marc Christiansen wrote:
    > Michael Spencer <> wrote:
    >
    > Nice. When you replace None by an object(), you have no restriction on
    > the elements any more:
    >
    >

    Thanks for the suggestion, Marc.

    Note that if there is no need to access the middle of the collection, then the
    implementation is simpler, and less resource-intensive, since the items can be
    singly-linked

    class UniqueQueue(object):
    def __init__(self, iterable):
    self._data = _data = {}
    self._last = self._root = object() # An object the user is unlikely to
    # reference - thanks Marc
    self.update(iterable)

    def push(self, obj):
    if not obj in self._data:
    self._data[self._last] = obj
    self._last = obj

    def pop(self):
    data = self._data
    first = data.pop(self._root)
    self._root = first
    return first

    def update(self, iterable):
    last = self._last
    data = self._data
    for item in iterable:
    if item not in data:
    data[last] = item
    last = item
    self._last = last

    def __iter__(self):
    data = self._data
    next = self._root
    try:
    while 1:
    next = data[next]
    yield next
    except KeyError:
    raise StopIteration

    def __repr__(self):
    return "%s(%s)" % (self.__class__.__name__,list(self))


    >>> q = UniqueQueue(range(5))
    >>> q.update(range(3,8))
    >>> q

    UniqueQueue([0, 1, 2, 3, 4, 5, 6, 7])
    >>> q.pop()

    0
    >>> q

    UniqueQueue([1, 2, 3, 4, 5, 6, 7])
    >>>
    >>> q.push(None)
    >>> q

    UniqueQueue([1, 2, 3, 4, 5, 6, 7, None])
    >>>



    Michael
    Michael Spencer, Mar 9, 2005
    #3
    1. Advertising

Want to reply to this thread or ask your own question?

It takes just 2 minutes to sign up (and it's free!). Just click the sign up button to choose a username and then you can ask your own questions on the forum.
Similar Threads
  1. Delaney, Timothy C (Timothy)

    RE: Best way to make a list unique?

    Delaney, Timothy C (Timothy), Mar 8, 2005, in forum: Python
    Replies:
    3
    Views:
    309
    Kent Johnson
    Mar 9, 2005
  2. Delaney, Timothy C (Timothy)

    RE: Best way to make a list unique?

    Delaney, Timothy C (Timothy), Mar 9, 2005, in forum: Python
    Replies:
    2
    Views:
    335
    Paul Rubin
    Mar 12, 2005
  3. ToshiBoy
    Replies:
    6
    Views:
    828
    ToshiBoy
    Aug 12, 2008
  4. laredotornado
    Replies:
    35
    Views:
    2,374
    Tom Anderson
    Mar 10, 2010
  5. Token Type
    Replies:
    9
    Views:
    339
    Chris Angelico
    Sep 9, 2012
Loading...

Share This Page