J
John Carter
Ooh this is so,so sweet....
If you know anything about lexical analyzers and parsers, read this
carefully and realize I have done something very very very Good
here...
# Here is a lexical analyzer that matches that breaks a
# Graphviz "Dot" file into tokens...
# digraph a_name {
# a -> b;
# c -> d -> e;
# f -> {a;b;c}
# d;
# };"
#
# Yields "digraph","a_name","{","a","->","b",";",....]
def tokenize(string)
regexp = %r{ ^ (\s+ | [a-zA-Z_][a-zA-Z0-9_]* | ([-+][0-9\.]+|[0-9\.]+) | -> | \{ | \} | \; ) }x
match_data = regexp.match( string)
while match_data
token = match_data[0]
yield token unless token =~ %r{ ^\s }x
match_data = regexp.match( match_data.post_match)
end
end
# Now here is the trick...
# If we map tokens to characters
# then parsing becomes a matter of ....
# Matching a regular Expression again!!
def parse(string)
# Build up a list of tokens in an array
token_list = []
# Make a one to one map between token type and the array
token_string = ''
tokenize(string) do |token|
token_list << token
if token == 'digraph'
token_string += 'd'
elsif token =~ %r{ [\{\};] }x
token_string += token
elsif token == '->'
token_string += '>'
elsif
token_string += 'i'
end
end
# Now our example is a string "di{i>i;i>i>i;i>{i;i;i}i;}"
regexp = %r{ ^ d i \{ ((i ; | i (> i)+ ; | i > \{ i (; i)* \})*) \} ;? $ }x
match_data = regexp.match( token_string)
raise "Parse error '#{token_string}' doesn't match '#{regexp.source}'" unless match_data
return [token_list[1], match_data[1], token_list[match_data.begin(1)...match_data.end(1)]]
end
name, edge_string, edge_list = parse(io)
# Now edge_string is i>i;i>i>i;i>{i;i;i}i;
# name is "a_name"
# edge_list is ["a","->","b",";","c",...]
John Carter Phone : (64)(3) 358 6639
Tait Electronics Fax : (64)(3) 359 4632
PO Box 1645 Christchurch Email : (e-mail address removed)
New Zealand
A Million Monkeys can inflict worse things than just Shakespeare on
your system.
If you know anything about lexical analyzers and parsers, read this
carefully and realize I have done something very very very Good
here...
# Here is a lexical analyzer that matches that breaks a
# Graphviz "Dot" file into tokens...
# digraph a_name {
# a -> b;
# c -> d -> e;
# f -> {a;b;c}
# d;
# };"
#
# Yields "digraph","a_name","{","a","->","b",";",....]
def tokenize(string)
regexp = %r{ ^ (\s+ | [a-zA-Z_][a-zA-Z0-9_]* | ([-+][0-9\.]+|[0-9\.]+) | -> | \{ | \} | \; ) }x
match_data = regexp.match( string)
while match_data
token = match_data[0]
yield token unless token =~ %r{ ^\s }x
match_data = regexp.match( match_data.post_match)
end
end
# Now here is the trick...
# If we map tokens to characters
# then parsing becomes a matter of ....
# Matching a regular Expression again!!
def parse(string)
# Build up a list of tokens in an array
token_list = []
# Make a one to one map between token type and the array
token_string = ''
tokenize(string) do |token|
token_list << token
if token == 'digraph'
token_string += 'd'
elsif token =~ %r{ [\{\};] }x
token_string += token
elsif token == '->'
token_string += '>'
elsif
token_string += 'i'
end
end
# Now our example is a string "di{i>i;i>i>i;i>{i;i;i}i;}"
regexp = %r{ ^ d i \{ ((i ; | i (> i)+ ; | i > \{ i (; i)* \})*) \} ;? $ }x
match_data = regexp.match( token_string)
raise "Parse error '#{token_string}' doesn't match '#{regexp.source}'" unless match_data
return [token_list[1], match_data[1], token_list[match_data.begin(1)...match_data.end(1)]]
end
name, edge_string, edge_list = parse(io)
# Now edge_string is i>i;i>i>i;i>{i;i;i}i;
# name is "a_name"
# edge_list is ["a","->","b",";","c",...]
John Carter Phone : (64)(3) 358 6639
Tait Electronics Fax : (64)(3) 359 4632
PO Box 1645 Christchurch Email : (e-mail address removed)
New Zealand
A Million Monkeys can inflict worse things than just Shakespeare on
your system.