Error checking using regex ?

G

Guy Robinson

I have the code below which parses an expression string and creates tokens.

Can anyone suggest the best of error checking for things like:

Valid variable only obj.attribute -whitespace allowed

test( "ff*2/dd.r..ss r") #additional ..ss -invalid variable.
test( "ff*$24..55/ddr") #double .. and $ -invalid number
test( "ff*2/dd.r.ss r") #variable with double . -invalid variable

I can't see an efficient way of doing this so any suggestions appreciated.

TIA,

Guy

code:

import re
import time

re_par = '[\(\)]'
re_num = '[0-9]*\.?[0-9]+\E?[0-9]*'
re_opr = '[\*\/\+\-\^]'
re_cns = 'PI'
re_trg = 'SIN|COS|TAN|ASIN|ACOS|ATAN|SGN'
re_var = '[a-z_0-9\s]*\.?[a-z_0-9\s]*'

recom = re.compile( '(?P<token>%s|%s|%s|%s|%s|%s)'
%(re_par,re_num,re_opr,re_cns,re_trg,re_var) ,re.VERBOSE|re.IGNORECASE)

def test(str):
output = []
try:
r = recom.split(str)
for rr in r:
rr = rr.strip()
#test for blank string
if rr =='':
pass
else:
output.append(rr)
print output

except:
print 'error of some kind'

class stopwatch:

def __init__(self):

pass
def start(self):

self.t = time.time()
return 'starting timer'

def stop(self):

rstr = 'stopped at %f seconds' %(time.time() -self.t)
self.t = 0
return rstr

e = stopwatch()
print e.start()
test( "9" )
test( "9 + 3 + 6" )
test( "9 + 3 / 11" )
test( "( 9 + 3)" )
test( "(9+3) / 11" )
test( "9 - 12 - 6" )
test( "-9 - (12 - 6)" )
test( "2*3.14159" )
test( "3.1415926535*3.1415926535 / 10" )
test( "PI * PI / 10" )
test( "PI*PI/10" )
test( "PI^2" )
test( "6.02E23 * 8.048" )
test( "sin(PI/2)" )
test( "2^3^2" )
test( "2^9" )
test( "sgn(-2)" )
test( "sgn(0)" )
test( "sgn(0.1)" )
test( "ff*2" )
test( "ff*g g/2" )
test( "ff*2/dd.r r")
test( "5*4+300/(5-2)*(6+4)+4" )
test( "((5*4+300)/(5-2))*(6+4)+4" )
test( "(320/3)*10+4" )

#now test error expressions

test( "ff*2/dd.r..ss r") #additional ..ss and whitespace -invalid
variable
test( "ff*$24..55/ddr") #double .. -invalid number
test( "ff*2/dd.r.ss r") #variable with double . -invalid variable
#test( "ff*((w.w+3)-2") #no closing parentheses-to be tested when
evaluating expression

print e.stop()
 
H

Heiko Wundram

Am Dienstag, 8. Juni 2004 13:26 schrieb Guy Robinson:
I have the code below which parses an expression string and creates tokens.

You cannot parse expressions using regular expressions, and neither check them
for error, as the language specified by regular expressions is not
"intelligent" enough to match braces (read any book on complexity theory
primers, you need a machine with state, such as a deterministic stack
machine, to check for matching braces).

Your best bet to be able to check an expression, and also to be able to parse
it, is to write a context free grammar for your syntax, try to parse the
string you're evaluating, and in case parsing fails, to complain that the
expression is invalid. If you're parsing Python expressions, your best bet is
to call functions from the compile module (which create a code object from a
Python expression which is callable using exec).

HTH!

Heiko.
 
P

Paul McGuire

Guy Robinson said:
I have the code below which parses an expression string and creates tokens.

Can anyone suggest the best of error checking for things like:

Valid variable only obj.attribute -whitespace allowed

test( "ff*2/dd.r..ss r") #additional ..ss -invalid variable.
test( "ff*$24..55/ddr") #double .. and $ -invalid number
test( "ff*2/dd.r.ss r") #variable with double . -invalid variable

I can't see an efficient way of doing this so any suggestions appreciated.

TIA,

Guy
<snip>

Guy -

Well, I recognize the test cases from an example that I include with
pyparsing. Are you trying to add support for variables to that example? If
so, here is the example, modified to support assignments to variables.

-- Paul

============================
# minimath.py (formerly fourfn.py)
#
# Demonstration of the parsing module, implementing a simple 4-function
expression parser,
# with support for scientific notation, and symbols for e and pi.
# Extended to add exponentiation and simple built-in functions.
# Extended to add variable assignment, storage, and evaluation, and
Python-like comments.
#
# Copyright 2003,2004 by Paul McGuire
#
from pyparsing import
Literal,CaselessLiteral,Word,Combine,Group,Optional,ZeroOrMore,OneOrMore,For
ward,nums,alphas,restOfLine,delimitedList
import math

variables = {}
exprStack = []

def pushFirst( str, loc, toks ):
global exprStack
if toks:
exprStack.append( toks[0] )
return toks

def assignVar( str, loc, toks ):
global exprStack
global variables
variables[ toks[0] ] = evaluateStack( exprStack )
pushFirst(str,loc,toks)


bnf = None
def BNF():
global bnf
if not bnf:
point = Literal( "." )
e = CaselessLiteral( "E" )
fnumber = Combine( Word( "+-"+nums, nums ) +
Optional( point + Optional( Word( nums ) ) ) +
Optional( e + Word( "+-"+nums, nums ) ) )
ident = Word(alphas, alphas+nums+"_$")
varident = delimitedList(ident,".",combine=True)

plus = Literal( "+" )
minus = Literal( "-" )
mult = Literal( "*" )
div = Literal( "/" )
lpar = Literal( "(" ).suppress()
rpar = Literal( ")" ).suppress()
addop = plus | minus
multop = mult | div
expop = Literal( "^" )
pi = CaselessLiteral( "PI" )

expr = Forward()
atom = ( pi | e | fnumber | ident + lpar + expr + rpar |
varident ).setParseAction( pushFirst ) | ( lpar + expr.suppress() + rpar )
factor = atom + ZeroOrMore( ( expop + expr ).setParseAction(
pushFirst ) )
term = factor + ZeroOrMore( ( multop + factor ).setParseAction(
pushFirst ) )
expr << term + ZeroOrMore( ( addop + term ).setParseAction(
pushFirst ) )
assignment = (varident + "=" + expr).setParseAction( assignVar )

bnf = Optional( assignment | expr )

comment = "#" + restOfLine
bnf.ignore(comment)

return bnf

# map operator symbols to corresponding arithmetic operations
opn = { "+" : ( lambda a,b: a + b ),
"-" : ( lambda a,b: a - b ),
"*" : ( lambda a,b: a * b ),
"/" : ( lambda a,b: a / b ),
"^" : ( lambda a,b: a ** b ) }
fn = { "sin" : math.sin,
"cos" : math.cos,
"tan" : math.tan,
"abs" : abs,
"trunc" : ( lambda a: int(a) ),
"round" : ( lambda a: int(a+0.5) ),
"sgn" : ( lambda a: ( (a<0 and -1) or (a>0 and 1) or 0 ) ) }
def evaluateStack( s ):
global variables
if not s: return 0.0
op = s.pop()
if op in "+-*/^":
op2 = evaluateStack( s )
op1 = evaluateStack( s )
return opn[op]( op1, op2 )
elif op == "PI":
return 3.1415926535
elif op == "E":
return 2.718281828
elif op[0].isalpha():
if op in variables:
return variables[op]
fnarg = evaluateStack( s )
return (fn[op])( fnarg )
else:
return float( op )

if __name__ == "__main__":

def test( str ):
global exprStack
exprStack = []
results = BNF().parseString( str )
print str, "->", results, "=>", exprStack, "=", evaluateStack(
exprStack )

test( "9" )
test( "9 + 3 + 6" )
test( "9 + 3 / 11" )
test( "(9 + 3)" )
test( "(9+3) / 11" )
test( "9 - 12 - 6" )
test( "9 - (12 - 6)" )
test( "2*3.14159" )
test( "3.1415926535*3.1415926535 / 10" )
test( "PI * PI / 10" )
test( "PI*PI/10" )
test( "PI^2" )
test( "6.02E23 * 8.048" )
test( "e / 3" )
test( "sin(PI/2)" )
test( "trunc(E)" )
test( "E^PI" )
test( "2^3^2" )
test( "2^9" )
test( "sgn(-2)" )
test( "sgn(0)" )
test( "sgn(0.1)" )
test( "5*4+300/(5-2)*(6+4)+4" )
test( "((5*4+301)/(5-2))*(6+4)+4" )
test( "(321/3)*10+4" )
test( "# nothing but comments" )
test( "a = 2^10" )
test( "a^0.1 # same as 10th root of 1024" )
test( "c = a" )
test( "b=a" )
test( "b-c" )
 
G

Guy Robinson

Hi Paul,

Yep your examples :) I'm using this as a learning experience and have
looked at your code but I have specific requirements for integration
into another application.

I'm using the regex to create a list of tokens to be processed into a
postfix processing string. This is then offloaded to another class that
processes the string for each database row.

The speed to generate the postffix string isn't important. But the speed
to process for each database row is.

Guy
I have the code below which parses an expression string and creates
tokens.

Can anyone suggest the best of error checking for things like:

Valid variable only obj.attribute -whitespace allowed

test( "ff*2/dd.r..ss r") #additional ..ss -invalid variable.
test( "ff*$24..55/ddr") #double .. and $ -invalid number
test( "ff*2/dd.r.ss r") #variable with double . -invalid variable

I can't see an efficient way of doing this so any suggestions appreciated.

TIA,

Guy

<snip>

Guy -

Well, I recognize the test cases from an example that I include with
pyparsing. Are you trying to add support for variables to that example? If
so, here is the example, modified to support assignments to variables.

-- Paul

============================
# minimath.py (formerly fourfn.py)
#
# Demonstration of the parsing module, implementing a simple 4-function
expression parser,
# with support for scientific notation, and symbols for e and pi.
# Extended to add exponentiation and simple built-in functions.
# Extended to add variable assignment, storage, and evaluation, and
Python-like comments.
#
# Copyright 2003,2004 by Paul McGuire
#
from pyparsing import
Literal,CaselessLiteral,Word,Combine,Group,Optional,ZeroOrMore,OneOrMore,For
ward,nums,alphas,restOfLine,delimitedList
import math

variables = {}
exprStack = []

def pushFirst( str, loc, toks ):
global exprStack
if toks:
exprStack.append( toks[0] )
return toks

def assignVar( str, loc, toks ):
global exprStack
global variables
variables[ toks[0] ] = evaluateStack( exprStack )
pushFirst(str,loc,toks)


bnf = None
def BNF():
global bnf
if not bnf:
point = Literal( "." )
e = CaselessLiteral( "E" )
fnumber = Combine( Word( "+-"+nums, nums ) +
Optional( point + Optional( Word( nums ) ) ) +
Optional( e + Word( "+-"+nums, nums ) ) )
ident = Word(alphas, alphas+nums+"_$")
varident = delimitedList(ident,".",combine=True)

plus = Literal( "+" )
minus = Literal( "-" )
mult = Literal( "*" )
div = Literal( "/" )
lpar = Literal( "(" ).suppress()
rpar = Literal( ")" ).suppress()
addop = plus | minus
multop = mult | div
expop = Literal( "^" )
pi = CaselessLiteral( "PI" )

expr = Forward()
atom = ( pi | e | fnumber | ident + lpar + expr + rpar |
varident ).setParseAction( pushFirst ) | ( lpar + expr.suppress() + rpar )
factor = atom + ZeroOrMore( ( expop + expr ).setParseAction(
pushFirst ) )
term = factor + ZeroOrMore( ( multop + factor ).setParseAction(
pushFirst ) )
expr << term + ZeroOrMore( ( addop + term ).setParseAction(
pushFirst ) )
assignment = (varident + "=" + expr).setParseAction( assignVar )

bnf = Optional( assignment | expr )

comment = "#" + restOfLine
bnf.ignore(comment)

return bnf

# map operator symbols to corresponding arithmetic operations
opn = { "+" : ( lambda a,b: a + b ),
"-" : ( lambda a,b: a - b ),
"*" : ( lambda a,b: a * b ),
"/" : ( lambda a,b: a / b ),
"^" : ( lambda a,b: a ** b ) }
fn = { "sin" : math.sin,
"cos" : math.cos,
"tan" : math.tan,
"abs" : abs,
"trunc" : ( lambda a: int(a) ),
"round" : ( lambda a: int(a+0.5) ),
"sgn" : ( lambda a: ( (a<0 and -1) or (a>0 and 1) or 0 ) ) }
def evaluateStack( s ):
global variables
if not s: return 0.0
op = s.pop()
if op in "+-*/^":
op2 = evaluateStack( s )
op1 = evaluateStack( s )
return opn[op]( op1, op2 )
elif op == "PI":
return 3.1415926535
elif op == "E":
return 2.718281828
elif op[0].isalpha():
if op in variables:
return variables[op]
fnarg = evaluateStack( s )
return (fn[op])( fnarg )
else:
return float( op )

if __name__ == "__main__":

def test( str ):
global exprStack
exprStack = []
results = BNF().parseString( str )
print str, "->", results, "=>", exprStack, "=", evaluateStack(
exprStack )

test( "9" )
test( "9 + 3 + 6" )
test( "9 + 3 / 11" )
test( "(9 + 3)" )
test( "(9+3) / 11" )
test( "9 - 12 - 6" )
test( "9 - (12 - 6)" )
test( "2*3.14159" )
test( "3.1415926535*3.1415926535 / 10" )
test( "PI * PI / 10" )
test( "PI*PI/10" )
test( "PI^2" )
test( "6.02E23 * 8.048" )
test( "e / 3" )
test( "sin(PI/2)" )
test( "trunc(E)" )
test( "E^PI" )
test( "2^3^2" )
test( "2^9" )
test( "sgn(-2)" )
test( "sgn(0)" )
test( "sgn(0.1)" )
test( "5*4+300/(5-2)*(6+4)+4" )
test( "((5*4+301)/(5-2))*(6+4)+4" )
test( "(321/3)*10+4" )
test( "# nothing but comments" )
test( "a = 2^10" )
test( "a^0.1 # same as 10th root of 1024" )
test( "c = a" )
test( "b=a" )
test( "b-c" )
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

Forum statistics

Threads
473,769
Messages
2,569,580
Members
45,054
Latest member
TrimKetoBoost

Latest Threads

Top