Error checking using regex ?

Discussion in 'Python' started by Guy Robinson, Jun 8, 2004.

  1. Guy Robinson

    Guy Robinson Guest

    I have the code below which parses an expression string and creates tokens.

    Can anyone suggest the best of error checking for things like:

    Valid variable only obj.attribute -whitespace allowed

    test( "ff*2/dd.r..ss r") #additional ..ss -invalid variable.
    test( "ff*$24..55/ddr") #double .. and $ -invalid number
    test( "ff*2/dd.r.ss r") #variable with double . -invalid variable

    I can't see an efficient way of doing this so any suggestions appreciated.

    TIA,

    Guy

    code:

    import re
    import time

    re_par = '[\(\)]'
    re_num = '[0-9]*\.?[0-9]+\E?[0-9]*'
    re_opr = '[\*\/\+\-\^]'
    re_cns = 'PI'
    re_trg = 'SIN|COS|TAN|ASIN|ACOS|ATAN|SGN'
    re_var = '[a-z_0-9\s]*\.?[a-z_0-9\s]*'

    recom = re.compile( '(?P<token>%s|%s|%s|%s|%s|%s)'
    %(re_par,re_num,re_opr,re_cns,re_trg,re_var) ,re.VERBOSE|re.IGNORECASE)

    def test(str):
    output = []
    try:
    r = recom.split(str)
    for rr in r:
    rr = rr.strip()
    #test for blank string
    if rr =='':
    pass
    else:
    output.append(rr)
    print output

    except:
    print 'error of some kind'

    class stopwatch:

    def __init__(self):

    pass
    def start(self):

    self.t = time.time()
    return 'starting timer'

    def stop(self):

    rstr = 'stopped at %f seconds' %(time.time() -self.t)
    self.t = 0
    return rstr

    e = stopwatch()
    print e.start()
    test( "9" )
    test( "9 + 3 + 6" )
    test( "9 + 3 / 11" )
    test( "( 9 + 3)" )
    test( "(9+3) / 11" )
    test( "9 - 12 - 6" )
    test( "-9 - (12 - 6)" )
    test( "2*3.14159" )
    test( "3.1415926535*3.1415926535 / 10" )
    test( "PI * PI / 10" )
    test( "PI*PI/10" )
    test( "PI^2" )
    test( "6.02E23 * 8.048" )
    test( "sin(PI/2)" )
    test( "2^3^2" )
    test( "2^9" )
    test( "sgn(-2)" )
    test( "sgn(0)" )
    test( "sgn(0.1)" )
    test( "ff*2" )
    test( "ff*g g/2" )
    test( "ff*2/dd.r r")
    test( "5*4+300/(5-2)*(6+4)+4" )
    test( "((5*4+300)/(5-2))*(6+4)+4" )
    test( "(320/3)*10+4" )

    #now test error expressions

    test( "ff*2/dd.r..ss r") #additional ..ss and whitespace -invalid
    variable
    test( "ff*$24..55/ddr") #double .. -invalid number
    test( "ff*2/dd.r.ss r") #variable with double . -invalid variable
    #test( "ff*((w.w+3)-2") #no closing parentheses-to be tested when
    evaluating expression

    print e.stop()
     
    Guy Robinson, Jun 8, 2004
    #1
    1. Advertising

  2. Am Dienstag, 8. Juni 2004 13:26 schrieb Guy Robinson:
    > I have the code below which parses an expression string and creates tokens.


    You cannot parse expressions using regular expressions, and neither check them
    for error, as the language specified by regular expressions is not
    "intelligent" enough to match braces (read any book on complexity theory
    primers, you need a machine with state, such as a deterministic stack
    machine, to check for matching braces).

    Your best bet to be able to check an expression, and also to be able to parse
    it, is to write a context free grammar for your syntax, try to parse the
    string you're evaluating, and in case parsing fails, to complain that the
    expression is invalid. If you're parsing Python expressions, your best bet is
    to call functions from the compile module (which create a code object from a
    Python expression which is callable using exec).

    HTH!

    Heiko.
     
    Heiko Wundram, Jun 8, 2004
    #2
    1. Advertising

  3. Guy Robinson

    Paul McGuire Guest

    "Guy Robinson" <-e-d.co.nz> wrote in message
    news:ca47pc$e11$...
    > I have the code below which parses an expression string and creates

    tokens.
    >
    > Can anyone suggest the best of error checking for things like:
    >
    > Valid variable only obj.attribute -whitespace allowed
    >
    > test( "ff*2/dd.r..ss r") #additional ..ss -invalid variable.
    > test( "ff*$24..55/ddr") #double .. and $ -invalid number
    > test( "ff*2/dd.r.ss r") #variable with double . -invalid variable
    >
    > I can't see an efficient way of doing this so any suggestions appreciated.
    >
    > TIA,
    >
    > Guy
    >

    <snip>

    Guy -

    Well, I recognize the test cases from an example that I include with
    pyparsing. Are you trying to add support for variables to that example? If
    so, here is the example, modified to support assignments to variables.

    -- Paul

    ============================
    # minimath.py (formerly fourfn.py)
    #
    # Demonstration of the parsing module, implementing a simple 4-function
    expression parser,
    # with support for scientific notation, and symbols for e and pi.
    # Extended to add exponentiation and simple built-in functions.
    # Extended to add variable assignment, storage, and evaluation, and
    Python-like comments.
    #
    # Copyright 2003,2004 by Paul McGuire
    #
    from pyparsing import
    Literal,CaselessLiteral,Word,Combine,Group,Optional,ZeroOrMore,OneOrMore,For
    ward,nums,alphas,restOfLine,delimitedList
    import math

    variables = {}
    exprStack = []

    def pushFirst( str, loc, toks ):
    global exprStack
    if toks:
    exprStack.append( toks[0] )
    return toks

    def assignVar( str, loc, toks ):
    global exprStack
    global variables
    variables[ toks[0] ] = evaluateStack( exprStack )
    pushFirst(str,loc,toks)


    bnf = None
    def BNF():
    global bnf
    if not bnf:
    point = Literal( "." )
    e = CaselessLiteral( "E" )
    fnumber = Combine( Word( "+-"+nums, nums ) +
    Optional( point + Optional( Word( nums ) ) ) +
    Optional( e + Word( "+-"+nums, nums ) ) )
    ident = Word(alphas, alphas+nums+"_$")
    varident = delimitedList(ident,".",combine=True)

    plus = Literal( "+" )
    minus = Literal( "-" )
    mult = Literal( "*" )
    div = Literal( "/" )
    lpar = Literal( "(" ).suppress()
    rpar = Literal( ")" ).suppress()
    addop = plus | minus
    multop = mult | div
    expop = Literal( "^" )
    pi = CaselessLiteral( "PI" )

    expr = Forward()
    atom = ( pi | e | fnumber | ident + lpar + expr + rpar |
    varident ).setParseAction( pushFirst ) | ( lpar + expr.suppress() + rpar )
    factor = atom + ZeroOrMore( ( expop + expr ).setParseAction(
    pushFirst ) )
    term = factor + ZeroOrMore( ( multop + factor ).setParseAction(
    pushFirst ) )
    expr << term + ZeroOrMore( ( addop + term ).setParseAction(
    pushFirst ) )
    assignment = (varident + "=" + expr).setParseAction( assignVar )

    bnf = Optional( assignment | expr )

    comment = "#" + restOfLine
    bnf.ignore(comment)

    return bnf

    # map operator symbols to corresponding arithmetic operations
    opn = { "+" : ( lambda a,b: a + b ),
    "-" : ( lambda a,b: a - b ),
    "*" : ( lambda a,b: a * b ),
    "/" : ( lambda a,b: a / b ),
    "^" : ( lambda a,b: a ** b ) }
    fn = { "sin" : math.sin,
    "cos" : math.cos,
    "tan" : math.tan,
    "abs" : abs,
    "trunc" : ( lambda a: int(a) ),
    "round" : ( lambda a: int(a+0.5) ),
    "sgn" : ( lambda a: ( (a<0 and -1) or (a>0 and 1) or 0 ) ) }
    def evaluateStack( s ):
    global variables
    if not s: return 0.0
    op = s.pop()
    if op in "+-*/^":
    op2 = evaluateStack( s )
    op1 = evaluateStack( s )
    return opn[op]( op1, op2 )
    elif op == "PI":
    return 3.1415926535
    elif op == "E":
    return 2.718281828
    elif op[0].isalpha():
    if op in variables:
    return variables[op]
    fnarg = evaluateStack( s )
    return (fn[op])( fnarg )
    else:
    return float( op )

    if __name__ == "__main__":

    def test( str ):
    global exprStack
    exprStack = []
    results = BNF().parseString( str )
    print str, "->", results, "=>", exprStack, "=", evaluateStack(
    exprStack )

    test( "9" )
    test( "9 + 3 + 6" )
    test( "9 + 3 / 11" )
    test( "(9 + 3)" )
    test( "(9+3) / 11" )
    test( "9 - 12 - 6" )
    test( "9 - (12 - 6)" )
    test( "2*3.14159" )
    test( "3.1415926535*3.1415926535 / 10" )
    test( "PI * PI / 10" )
    test( "PI*PI/10" )
    test( "PI^2" )
    test( "6.02E23 * 8.048" )
    test( "e / 3" )
    test( "sin(PI/2)" )
    test( "trunc(E)" )
    test( "E^PI" )
    test( "2^3^2" )
    test( "2^9" )
    test( "sgn(-2)" )
    test( "sgn(0)" )
    test( "sgn(0.1)" )
    test( "5*4+300/(5-2)*(6+4)+4" )
    test( "((5*4+301)/(5-2))*(6+4)+4" )
    test( "(321/3)*10+4" )
    test( "# nothing but comments" )
    test( "a = 2^10" )
    test( "a^0.1 # same as 10th root of 1024" )
    test( "c = a" )
    test( "b=a" )
    test( "b-c" )
     
    Paul McGuire, Jun 8, 2004
    #3
  4. Guy Robinson

    Guy Robinson Guest

    Hi Paul,

    Yep your examples :) I'm using this as a learning experience and have
    looked at your code but I have specific requirements for integration
    into another application.

    I'm using the regex to create a list of tokens to be processed into a
    postfix processing string. This is then offloaded to another class that
    processes the string for each database row.

    The speed to generate the postffix string isn't important. But the speed
    to process for each database row is.

    Guy

    > "Guy Robinson" <-e-d.co.nz> wrote in message
    > news:ca47pc$e11$...
    >
    >>I have the code below which parses an expression string and creates

    >
    > tokens.
    >
    >>Can anyone suggest the best of error checking for things like:
    >>
    >>Valid variable only obj.attribute -whitespace allowed
    >>
    >>test( "ff*2/dd.r..ss r") #additional ..ss -invalid variable.
    >>test( "ff*$24..55/ddr") #double .. and $ -invalid number
    >>test( "ff*2/dd.r.ss r") #variable with double . -invalid variable
    >>
    >>I can't see an efficient way of doing this so any suggestions appreciated.
    >>
    >>TIA,
    >>
    >>Guy
    >>

    >
    > <snip>
    >
    > Guy -
    >
    > Well, I recognize the test cases from an example that I include with
    > pyparsing. Are you trying to add support for variables to that example? If
    > so, here is the example, modified to support assignments to variables.
    >
    > -- Paul
    >
    > ============================
    > # minimath.py (formerly fourfn.py)
    > #
    > # Demonstration of the parsing module, implementing a simple 4-function
    > expression parser,
    > # with support for scientific notation, and symbols for e and pi.
    > # Extended to add exponentiation and simple built-in functions.
    > # Extended to add variable assignment, storage, and evaluation, and
    > Python-like comments.
    > #
    > # Copyright 2003,2004 by Paul McGuire
    > #
    > from pyparsing import
    > Literal,CaselessLiteral,Word,Combine,Group,Optional,ZeroOrMore,OneOrMore,For
    > ward,nums,alphas,restOfLine,delimitedList
    > import math
    >
    > variables = {}
    > exprStack = []
    >
    > def pushFirst( str, loc, toks ):
    > global exprStack
    > if toks:
    > exprStack.append( toks[0] )
    > return toks
    >
    > def assignVar( str, loc, toks ):
    > global exprStack
    > global variables
    > variables[ toks[0] ] = evaluateStack( exprStack )
    > pushFirst(str,loc,toks)
    >
    >
    > bnf = None
    > def BNF():
    > global bnf
    > if not bnf:
    > point = Literal( "." )
    > e = CaselessLiteral( "E" )
    > fnumber = Combine( Word( "+-"+nums, nums ) +
    > Optional( point + Optional( Word( nums ) ) ) +
    > Optional( e + Word( "+-"+nums, nums ) ) )
    > ident = Word(alphas, alphas+nums+"_$")
    > varident = delimitedList(ident,".",combine=True)
    >
    > plus = Literal( "+" )
    > minus = Literal( "-" )
    > mult = Literal( "*" )
    > div = Literal( "/" )
    > lpar = Literal( "(" ).suppress()
    > rpar = Literal( ")" ).suppress()
    > addop = plus | minus
    > multop = mult | div
    > expop = Literal( "^" )
    > pi = CaselessLiteral( "PI" )
    >
    > expr = Forward()
    > atom = ( pi | e | fnumber | ident + lpar + expr + rpar |
    > varident ).setParseAction( pushFirst ) | ( lpar + expr.suppress() + rpar )
    > factor = atom + ZeroOrMore( ( expop + expr ).setParseAction(
    > pushFirst ) )
    > term = factor + ZeroOrMore( ( multop + factor ).setParseAction(
    > pushFirst ) )
    > expr << term + ZeroOrMore( ( addop + term ).setParseAction(
    > pushFirst ) )
    > assignment = (varident + "=" + expr).setParseAction( assignVar )
    >
    > bnf = Optional( assignment | expr )
    >
    > comment = "#" + restOfLine
    > bnf.ignore(comment)
    >
    > return bnf
    >
    > # map operator symbols to corresponding arithmetic operations
    > opn = { "+" : ( lambda a,b: a + b ),
    > "-" : ( lambda a,b: a - b ),
    > "*" : ( lambda a,b: a * b ),
    > "/" : ( lambda a,b: a / b ),
    > "^" : ( lambda a,b: a ** b ) }
    > fn = { "sin" : math.sin,
    > "cos" : math.cos,
    > "tan" : math.tan,
    > "abs" : abs,
    > "trunc" : ( lambda a: int(a) ),
    > "round" : ( lambda a: int(a+0.5) ),
    > "sgn" : ( lambda a: ( (a<0 and -1) or (a>0 and 1) or 0 ) ) }
    > def evaluateStack( s ):
    > global variables
    > if not s: return 0.0
    > op = s.pop()
    > if op in "+-*/^":
    > op2 = evaluateStack( s )
    > op1 = evaluateStack( s )
    > return opn[op]( op1, op2 )
    > elif op == "PI":
    > return 3.1415926535
    > elif op == "E":
    > return 2.718281828
    > elif op[0].isalpha():
    > if op in variables:
    > return variables[op]
    > fnarg = evaluateStack( s )
    > return (fn[op])( fnarg )
    > else:
    > return float( op )
    >
    > if __name__ == "__main__":
    >
    > def test( str ):
    > global exprStack
    > exprStack = []
    > results = BNF().parseString( str )
    > print str, "->", results, "=>", exprStack, "=", evaluateStack(
    > exprStack )
    >
    > test( "9" )
    > test( "9 + 3 + 6" )
    > test( "9 + 3 / 11" )
    > test( "(9 + 3)" )
    > test( "(9+3) / 11" )
    > test( "9 - 12 - 6" )
    > test( "9 - (12 - 6)" )
    > test( "2*3.14159" )
    > test( "3.1415926535*3.1415926535 / 10" )
    > test( "PI * PI / 10" )
    > test( "PI*PI/10" )
    > test( "PI^2" )
    > test( "6.02E23 * 8.048" )
    > test( "e / 3" )
    > test( "sin(PI/2)" )
    > test( "trunc(E)" )
    > test( "E^PI" )
    > test( "2^3^2" )
    > test( "2^9" )
    > test( "sgn(-2)" )
    > test( "sgn(0)" )
    > test( "sgn(0.1)" )
    > test( "5*4+300/(5-2)*(6+4)+4" )
    > test( "((5*4+301)/(5-2))*(6+4)+4" )
    > test( "(321/3)*10+4" )
    > test( "# nothing but comments" )
    > test( "a = 2^10" )
    > test( "a^0.1 # same as 10th root of 1024" )
    > test( "c = a" )
    > test( "b=a" )
    > test( "b-c" )
    >
    >
     
    Guy Robinson, Jun 8, 2004
    #4
    1. Advertising

Want to reply to this thread or ask your own question?

It takes just 2 minutes to sign up (and it's free!). Just click the sign up button to choose a username and then you can ask your own questions on the forum.
Similar Threads
  1. =?Utf-8?B?SmViQnVzaGVsbA==?=

    Is ASP Validator Regex Engine Same As VS2003 Find Regex Engine?

    =?Utf-8?B?SmViQnVzaGVsbA==?=, Oct 22, 2005, in forum: ASP .Net
    Replies:
    2
    Views:
    745
    =?Utf-8?B?SmViQnVzaGVsbA==?=
    Oct 22, 2005
  2. Rick Venter

    perl regex to java regex

    Rick Venter, Oct 29, 2003, in forum: Java
    Replies:
    5
    Views:
    1,693
    Ant...
    Nov 6, 2003
  3. Replies:
    2
    Views:
    629
  4. Xah Lee
    Replies:
    1
    Views:
    972
    Ilias Lazaridis
    Sep 22, 2006
  5. Replies:
    3
    Views:
    834
    Reedick, Andrew
    Jul 1, 2008
Loading...

Share This Page