Ahhhh.... Right. Hmmmmm.... <thinks hard> You're correct, you do
need to set the value to the key. I think using a dict is better than
using intern(). Here's an optimization:
node2 = node_cache.setdefault(node2, node2)
I would sure be more comfortable with a change in nomenclature, e.g.,
node2name = node_name_cache.setdefault(node2name, node2name)
so that it is clear that you are dealing with name strings, not typical graph
vertex/node objects.
BTW, reading from a file, you can probably not in general avoid forward references,
(i.e., names of adjacent nodes yet to be defined), but they should be resolvable at the end,
so you could make nodes that refer to adjacent nodes directly instead of by name.
Nodes would carry a ref to their own (string) name, and go by that same name string in a graph dict,
so there should be no duplication of strings, just references to them. E.g.,
(I started to just sketch something, and ... well, it's hard to stop programming Python
before something is running at least preliminarily ;-) I subclassed dict to keep nodes in by name.
====< graph.py >=========================================================
def boxit(s):
lines = s.splitlines()
wid = max(map(len,lines))
ret = ['+-%s-+' % ('-'*wid)]
fmt = '| %%-%ds |'%wid
for line in lines: ret.append(fmt%line)
ret.append(ret[0])
return '\n'.join(ret)
def errep(Ex, *rest):
if __debug__: print boxit('%s: %s'%(Ex.__name__, rest and rest[0])) # and blunder on
else: raise Ex(*rest)
class Node(object):
__slots__ = 'name', 'adj_list'
def __init__(self, name):
self.name = name
self.adj_list = None # signify undefined, vs [] for no adjacent nodes
class Graph(dict):
def load(self, infile, print_lines=False):
for line in infile:
if print_lines: print line.rstrip()
sline = line.strip()
if not sline or sline.startswith('#'): continue
node_def_names = line.split() # assume line like 'nodename adj_node1_name adj_node2_name ...'
node_name = node_def_names[0]
if node_name in self:
# don't allow re-definition
errep(ValueError, 'Node %r being re-defined:\n new adj: %r\n old adj: %r' %(
node_name, node_def_names[1:], self[node_name].adj_list))
else:
self[node_name] = Node(node_name)
# set adj list, making making this a defined node
self[node_name].adj_list = node_def_names[1:]
# change adj lists to direct references to adjacent nodes
for node in self.values():
adj_list = node.adj_list
assert adj_list is not None
for i in xrange(len(adj_list)):
adj_name = adj_list
try:
adj_list = self[adj_name]
except KeyError:
errep( ValueError, 'node %r refers to undefined adj node %r' % (node.name, adj_name))
# continuing if debugging
adj_list = Node(adj_name) # adj_list None (vs []) signifies undefined
def show(self):
node_names = self.keys()
node_names.sort()
visited = {}
ret = []
def prtree(node, indent=0):
if node.name in visited:
if indent: ret.append('%s%s (see previous)'%(indent*' ',node.name))
else:
visited[node.name]=True
if node.adj_list is None:
ret.append('%s%s (undefined) '%(indent*' ',node.name))
return
ret.append('%s%s'%(indent*' ',node.name))
for adj in node.adj_list:
prtree(adj, indent+1)
for node_name in node_names:
prtree(self[node_name])
ret.append('')
return '\n'.join(ret)
def test(filepath=''):
graph = Graph() # name-string -> Node instance
if not filepath:
import StringIO
f_graph = StringIO.StringIO("""
A B C D
B E F
C G H
G A
D B
E
F
H
D E F
X E Y
""")
else:
f_graph = file(filepath)
print 'Loading graph from %s ...\n--' % (filepath or '(test)')
graph.load(f_graph, True) # print the lines read
print '--'
f_graph.close()
print graph.show()
if __name__ == '__main__':
import sys
args = sys.argv[1:]
test(args and args[0] or '')
=========================================================================
Result:
[16:33] C:\pywk\clp\pp>graph.py
Loading graph from (test) ...
--
A B C D
B E F
C G H
G A
D B
E
F
H
D E F
+----------------------------------------+
| ValueError: Node 'D' being re-defined: |
| new adj: ['E', 'F'] |
| old adj: ['B'] |
+----------------------------------------+
X E Y
+-------------------------------------------------------+
| ValueError: node 'X' refers to undefined adj node 'Y' |
+-------------------------------------------------------+
--
A
B
E
F
C
G
A (see previous)
H
D
B (see previous)
X
E (see previous)
Y (undefined)
(Not tested beyond what you see. NTBWYS? ;-)
Regards,
Bengt Richter