N
Neil Cerutti
The following simple language parser and interpreter is from
chapter 2 of the textbook: _Programming Languages: Application
and Interpretation_, by Shriram Krishnamurthi.
http://www.cs.brown.edu/~sk/Publications/Books/ProgLangs/
First, I'll show the Scheme version of the program, which uses
the cool define-type and type-case macros (I'd never heard of
them before reading this book, which is too bad). I think it is
beautiful.
; Our grammar:
; <WAE> ::=
; <num>
; | { + <WAE> <WAE> }
; | { - <WAE> <WAE> }
; | {with {<id> <WAE>} <WAE>}
; | <id>
(define-type WAE
[num (n number?)]
[add (lhs WAE?)
(rhs WAE?)]
[sub (lhs WAE?)
(rhs WAE?)]
[id (name symbol?)]
[with (name symbol?) (named-expr WAE?) (named-body WAE?)])
;; parse : sexp --> WAE
;; To convert s-expressions into WAEs
(define (parse sexp)
(cond
[(number? sexp) (num sexp)]
[(symbol? sexp) (id sexp)]
[(list? sexp)
(case (first sexp)
[(+) (if (= (length sexp) 3)
(add (parse (second sexp))
(parse (third sexp)))
(error 'parse "add: expected 2 args"))]
[(-) (if (= (length sexp) 3)
(sub (parse (second sexp))
(parse (third sexp)))
(error 'parse "sub: expected 2 args"))]
[(with) (with (first (second sexp))
(parse (second (second sexp)))
(parse (third sexp)))]
[else (error 'parse "invalid expression")])]
[else (error 'parse "invalid expression")]))
;; subst : WAE symbol WAE --> WAE
;; substitutes second argument with third argument in first argument,
;; as per the rules of substitution; the resulting expression contains
;; no free instances of the second argument.
(define (subst expr sub-id val)
(type-case WAE expr
[num (n) expr]
[add (lhs rhs) (add (subst lhs sub-id val)
(subst rhs sub-id val))]
[sub (lhs rhs) (sub (subst lhs sub-id val)
(subst rhs sub-id val))]
[with (bound-id named-expr bound-body)
(if (symbol=? bound-id sub-id)
(with bound-id
(subst named-expr sub-id val)
bound-body)
(with bound-id
(subst named-expr sub-id val)
(subst bound-body sub-id val)))]
[id (v) (if (symbol=? v sub-id) val expr)]))
;; calc : WAE --> number
;; consumes a WAE and computes the corresponding number
(define (calc an-ae)
(type-case WAE an-ae
[num (n) n]
[add (lhs rhs) (+ (calc lhs) (calc rhs))]
[sub (lhs rhs) (- (calc lhs) (calc rhs))]
[with (bound-id named-expr bound-body)
(calc (subst bound-body
bound-id
(num (calc named-expr))))]
[id (v) (error 'calc "free identifier")]))
(test (calc (parse '3)) 3)
(test (calc (parse '{+ 3 4})) 7)
(test (calc (parse '{+ {- 3 4} 7})) 6)
; etc... the rest of the tests omited
The following is a translation of the above program into Python,
as best as I could. I have not included the simple s-expression
parser that I had to write as a front-end, but I'll post if if
anyone's interested. The type-case is replaced with a class
hierarchy, resulting in some extra syntax. Most of the docstrings
and tests have been omited as irrelevant.
import sexp
import operator
class Wae(object):
""" An abstract base class for representing the abstract syntax tree of a
Wae expression.
"""
def calc(self):
raise NotImplementedError
class Num(Wae):
""" This subclass represents numbers.
"""
def __init__(self, number):
self.number = number
def __repr__(self):
return '<Num %d>' % self.number
def subst_(self, sub_id, value):
return self
def calc(self):
return self.number
class Id(Wae):
""" This subclass represents an identifier.
"""
def __init__(self, name):
self.name = name
def __repr__(self):
return '<Id \'%s\'>' % self.name
def subst_(self, sub_id, value):
if self.name == sub_id:
return value
return self
def calc(self):
raise SyntaxError('Free identifier '+self.name)
class BinOp(Wae):
""" This abstract class represents binary operations.
"""
def __init__(self, lhs, rhs):
self.lhs = lhs
self.rhs = rhs
def __repr__(self):
return '<%s %r %r>' % (self.__class__.__name__, self.lhs, self.rhs)
def subst_(self, sub_id, value):
return self.__class__(self.lhs.subst_(sub_id, value),
self.rhs.subst_(sub_id, value))
def calc(self):
return self.op(self.lhs.calc(), self.rhs.calc())
class Add(BinOp):
""" This subclass represents an addition expression.
"""
def __init__(self, lhs, rhs):
super(Add, self).__init__(lhs, rhs)
self.op = operator.add
class Sub(BinOp):
""" This subclass represents a substraction expression.
"""
def __init__(self, lhs, rhs):
super(Sub, self).__init__(lhs, rhs)
self.op = operator.sub
class With(Wae):
""" This subclass represents the Wae binding expression, 'with'.
"""
def __init__(self, bound_id, named_expr, bound_body):
self.bound_id = bound_id
self.named_expr = named_expr
self.bound_body = bound_body
def __repr__(self):
return '<With \'%s\' %r %r>' % (self.bound_id,
self.named_expr, self.bound_body)
def calc(self):
return self.bound_body.subst_(
self.bound_id, Num(self.named_expr.calc())).calc()
def subst_(self, sub_id, value):
if sub_id == self.bound_id:
return With(self.bound_id,
self.named_expr.subst_(sub_id, value),
self.bound_body)
else:
return With(self.bound_id,
self.named_expr.subst_(sub_id, value),
self.bound_body.subst_(sub_id, value))
# parse : sexp --> Wae
# Wae grammar:
# Wae ::= Num
# Wae ::= ( + <Wae> <Wae> )
# Wae ::= ( - <Wae> <Wae> )
# Wae ::= ( with ( Id <Wae> ) <Wae> )
# Wae ::= Id
def parse(expr):
""" Compile a string representing an WAE expression into an object that
represents the abstract syntax tree of that expression. The AST provides a
calc method that returns the result of evaluating the expression.
Traceback (most recent call last):
...
SyntaxError: Ill-formed expression
Traceback (most recent call last):
...
SyntaxError: Ill-formed expression
3
7
20
Traceback (most recent call last):
...
SyntaxError: Free identifier x
"""
def parse_ast(ast):
if isinstance(ast, int):
return Num(ast)
elif isinstance(ast, str):
return Id(ast)
elif isinstance(ast, tuple):
op = ast[0]
if op in ['+', '-'] and len(ast) == 3:
if op == '+':
return Add(parse_ast(ast[1]), parse_ast(ast[2]))
elif op == '-':
return Sub(parse_ast(ast[1]), parse_ast(ast[2]))
elif op == 'with' and len(ast) == 3 and len(ast[1]) == 2:
return With(ast[1][0], parse_ast(ast[1][1]), parse_ast(ast[2]))
raise SyntaxError("Ill-formed expression")
return parse_ast(sexp.read(expr))
The sexp parser I wrote returns a tuple that represents the parse
tree of an s-expression, and recognizes only s-expressions,
strings and integers.
How can I make the Python more idiomatic Python?
How can I make it more "beautiful"? A type hierarchy seems
over-engineered in comparison to Scheme's type-case, but I
liked a cascade of isinstance calls (as in parse) even less.
The type hierarchy did allow me to factor out the code
duplication in the (sub ...) and (add ...) types of Scheme, and
seems like a nice benefit over type-case.
chapter 2 of the textbook: _Programming Languages: Application
and Interpretation_, by Shriram Krishnamurthi.
http://www.cs.brown.edu/~sk/Publications/Books/ProgLangs/
First, I'll show the Scheme version of the program, which uses
the cool define-type and type-case macros (I'd never heard of
them before reading this book, which is too bad). I think it is
beautiful.
; Our grammar:
; <WAE> ::=
; <num>
; | { + <WAE> <WAE> }
; | { - <WAE> <WAE> }
; | {with {<id> <WAE>} <WAE>}
; | <id>
(define-type WAE
[num (n number?)]
[add (lhs WAE?)
(rhs WAE?)]
[sub (lhs WAE?)
(rhs WAE?)]
[id (name symbol?)]
[with (name symbol?) (named-expr WAE?) (named-body WAE?)])
;; parse : sexp --> WAE
;; To convert s-expressions into WAEs
(define (parse sexp)
(cond
[(number? sexp) (num sexp)]
[(symbol? sexp) (id sexp)]
[(list? sexp)
(case (first sexp)
[(+) (if (= (length sexp) 3)
(add (parse (second sexp))
(parse (third sexp)))
(error 'parse "add: expected 2 args"))]
[(-) (if (= (length sexp) 3)
(sub (parse (second sexp))
(parse (third sexp)))
(error 'parse "sub: expected 2 args"))]
[(with) (with (first (second sexp))
(parse (second (second sexp)))
(parse (third sexp)))]
[else (error 'parse "invalid expression")])]
[else (error 'parse "invalid expression")]))
;; subst : WAE symbol WAE --> WAE
;; substitutes second argument with third argument in first argument,
;; as per the rules of substitution; the resulting expression contains
;; no free instances of the second argument.
(define (subst expr sub-id val)
(type-case WAE expr
[num (n) expr]
[add (lhs rhs) (add (subst lhs sub-id val)
(subst rhs sub-id val))]
[sub (lhs rhs) (sub (subst lhs sub-id val)
(subst rhs sub-id val))]
[with (bound-id named-expr bound-body)
(if (symbol=? bound-id sub-id)
(with bound-id
(subst named-expr sub-id val)
bound-body)
(with bound-id
(subst named-expr sub-id val)
(subst bound-body sub-id val)))]
[id (v) (if (symbol=? v sub-id) val expr)]))
;; calc : WAE --> number
;; consumes a WAE and computes the corresponding number
(define (calc an-ae)
(type-case WAE an-ae
[num (n) n]
[add (lhs rhs) (+ (calc lhs) (calc rhs))]
[sub (lhs rhs) (- (calc lhs) (calc rhs))]
[with (bound-id named-expr bound-body)
(calc (subst bound-body
bound-id
(num (calc named-expr))))]
[id (v) (error 'calc "free identifier")]))
(test (calc (parse '3)) 3)
(test (calc (parse '{+ 3 4})) 7)
(test (calc (parse '{+ {- 3 4} 7})) 6)
; etc... the rest of the tests omited
The following is a translation of the above program into Python,
as best as I could. I have not included the simple s-expression
parser that I had to write as a front-end, but I'll post if if
anyone's interested. The type-case is replaced with a class
hierarchy, resulting in some extra syntax. Most of the docstrings
and tests have been omited as irrelevant.
import sexp
import operator
class Wae(object):
""" An abstract base class for representing the abstract syntax tree of a
Wae expression.
"""
def calc(self):
raise NotImplementedError
class Num(Wae):
""" This subclass represents numbers.
"""
def __init__(self, number):
self.number = number
def __repr__(self):
return '<Num %d>' % self.number
def subst_(self, sub_id, value):
return self
def calc(self):
return self.number
class Id(Wae):
""" This subclass represents an identifier.
"""
def __init__(self, name):
self.name = name
def __repr__(self):
return '<Id \'%s\'>' % self.name
def subst_(self, sub_id, value):
if self.name == sub_id:
return value
return self
def calc(self):
raise SyntaxError('Free identifier '+self.name)
class BinOp(Wae):
""" This abstract class represents binary operations.
"""
def __init__(self, lhs, rhs):
self.lhs = lhs
self.rhs = rhs
def __repr__(self):
return '<%s %r %r>' % (self.__class__.__name__, self.lhs, self.rhs)
def subst_(self, sub_id, value):
return self.__class__(self.lhs.subst_(sub_id, value),
self.rhs.subst_(sub_id, value))
def calc(self):
return self.op(self.lhs.calc(), self.rhs.calc())
class Add(BinOp):
""" This subclass represents an addition expression.
"""
def __init__(self, lhs, rhs):
super(Add, self).__init__(lhs, rhs)
self.op = operator.add
class Sub(BinOp):
""" This subclass represents a substraction expression.
"""
def __init__(self, lhs, rhs):
super(Sub, self).__init__(lhs, rhs)
self.op = operator.sub
class With(Wae):
""" This subclass represents the Wae binding expression, 'with'.
"""
def __init__(self, bound_id, named_expr, bound_body):
self.bound_id = bound_id
self.named_expr = named_expr
self.bound_body = bound_body
def __repr__(self):
return '<With \'%s\' %r %r>' % (self.bound_id,
self.named_expr, self.bound_body)
def calc(self):
return self.bound_body.subst_(
self.bound_id, Num(self.named_expr.calc())).calc()
def subst_(self, sub_id, value):
if sub_id == self.bound_id:
return With(self.bound_id,
self.named_expr.subst_(sub_id, value),
self.bound_body)
else:
return With(self.bound_id,
self.named_expr.subst_(sub_id, value),
self.bound_body.subst_(sub_id, value))
# parse : sexp --> Wae
# Wae grammar:
# Wae ::= Num
# Wae ::= ( + <Wae> <Wae> )
# Wae ::= ( - <Wae> <Wae> )
# Wae ::= ( with ( Id <Wae> ) <Wae> )
# Wae ::= Id
def parse(expr):
""" Compile a string representing an WAE expression into an object that
represents the abstract syntax tree of that expression. The AST provides a
calc method that returns the result of evaluating the expression.
Traceback (most recent call last):
...
SyntaxError: Ill-formed expression
Traceback (most recent call last):
...
SyntaxError: Ill-formed expression
3
7
20
Traceback (most recent call last):
...
SyntaxError: Free identifier x
"""
def parse_ast(ast):
if isinstance(ast, int):
return Num(ast)
elif isinstance(ast, str):
return Id(ast)
elif isinstance(ast, tuple):
op = ast[0]
if op in ['+', '-'] and len(ast) == 3:
if op == '+':
return Add(parse_ast(ast[1]), parse_ast(ast[2]))
elif op == '-':
return Sub(parse_ast(ast[1]), parse_ast(ast[2]))
elif op == 'with' and len(ast) == 3 and len(ast[1]) == 2:
return With(ast[1][0], parse_ast(ast[1][1]), parse_ast(ast[2]))
raise SyntaxError("Ill-formed expression")
return parse_ast(sexp.read(expr))
The sexp parser I wrote returns a tuple that represents the parse
tree of an s-expression, and recognizes only s-expressions,
strings and integers.
How can I make the Python more idiomatic Python?
How can I make it more "beautiful"? A type hierarchy seems
over-engineered in comparison to Scheme's type-case, but I
liked a cascade of isinstance calls (as in parse) even less.
The type hierarchy did allow me to factor out the code
duplication in the (sub ...) and (add ...) types of Scheme, and
seems like a nice benefit over type-case.