Extracting functions from C/C++ using Perl, would like Code Review Help if possible

Discussion in 'Perl Misc' started by sln@netherlands.com, Feb 6, 2009.

  1. Guest

    First installment.
    This was inspired by some other post on here.
    I was wondering if I could get a review of my preliminary.
    I need constructive critque's.

    Thank you!
    - sln

    ## ===============================================
    ## C_FunctionParser_v1.pl
    ## -------------------------------
    ## C/C++ Style Function Parser
    ## Idea - To parse out C/C++ style functions
    ## that have parenthetical closures (some don't).
    ## (Could be a package some day, dunno, maybe ..)
    ## - sln *** @ 2/6/09
    ## ===============================================
    my $VERSION = 1.0;
    $|=1;

    use strict;
    use warnings;

    # Prototype's
    sub Find_Function(\$\@);

    # File-scoped variables
    my ($FxParse,$FName,$Preamble);

    # Set default function name
    SetFunctionName();

    ## ----------------------
    ## Main (user play area)
    ## ----------------------

    # Source file
    my $Source = join '', <DATA>;

    # Extended, possibly non-compliant, function name - pattern examples:
    # SetFunctionName(qr/_T/);
    # SetFunctionName(qr/\(\s*void\s*\)\s*function/);
    # SetFunctionName("\\(\\s*void\\s*\\)\\s*function");


    # Parse some functions
    # func ...
    my @Funct = ();
    Find_Function($Source, @Funct);
    # func2 ...
    my @Funct2 = ();
    SetFunctionName(qr/_T/);
    Find_Function($Source, @Funct2);


    # Print @Funct functions found
    # Note that segments can be modified and collated.
    if (!@Funct) {
    print "Function name pattern: '$FName' not found!\n";
    } else {
    print "\nFound ".@Funct." matches.\nFunction pattern: '$FName' \n";
    }
    for my $ref (@Funct) {
    printf "\n\@: %6d - %s\n", $$ref[3], substr($Source, $$ref[0], $$ref[2] - $$ref[0]);
    }

    ## ----------
    ## End Main
    ## ----------


    # ---------------------------------------------------------

    # Set the parser's function regex pattern
    #
    sub SetFunctionName
    {
    if (!@_) {
    $FName = "_*[a-zA-Z][\\w]*"; # Matches all compliant function names (default)
    } else {
    $FName = shift;
    }
    $Preamble = "\\s*\\(";

    # Compile function parser regular expression
    # Regex condensed:
    # $FxParse = qr!/{2}.*?\n|/\*.*?\*/|\\.|'["()]'|(")|($FName$Preamble)|(\()|(\))!s;
    # | | |1 1|2 2|3 3|4 4
    # Note - Non-Captured, matching items, are meant to consume!
    # -----------------------------------------------------------
    # Regex /xpanded (with commentary):
    $FxParse = # Regex Precedence (items MUST be in this order):
    qr! # -----------------------------------------------
    /{2}.*?\n | # comment - // + anything + end of line
    /\*.*?\*/ | # comment - /* + anything + */
    \\. | # escaped char - backslash + ANY character
    '["()]' | # single quote char - quote then one of ", (, or ), then quote
    (") | # capture $1 - double quote as a flag
    ($FName$Preamble) | # capture $2 - $FName + $Preamble
    (\() | # capture $3 - ( as a flag
    (\)) # capture $4 - ) as a flag
    !xs;
    }

    # Recursive procedure that finds C/C++ style functions
    # (the engine)
    # Notes:
    # - This is not a syntax checker !!!
    # - Nested functions are found recursively, but the search is still streamed and single pass.
    # - Parenthetical closures are determined via counter that persists in recursion.
    # - This precedence avoids all ambigous paranthetical open/close conditions:
    # 1. Dual comment styles.
    # 2. Escapes.
    # 3. Single quoted characters.
    # 4. Double quotes, fip-flopped to determine closure.
    # - Improper closures are reported, index removed from the stack and processing continues
    # (this would be a syntax error, ie: the code won't complie, but it is reported as a closure error).
    #
    sub Find_Function(\$\@)
    {
    my ($src,$Funct,$offset,$pos,$Ndx,$Lines) = @_;

    my ($closure,$dquotes,$aref,$misc) = (1,0,[],1);
    $pos = 0 if (!defined $pos);
    $offset = 0 if (!defined $offset);
    $Ndx = $aref if (!defined $Ndx);
    $Lines = \$misc if (!defined $Lines);
    pos($$src) = $pos;

    while ($$src =~ /$FxParse/gc)
    {
    if (defined $1) # double quote "
    {
    $dquotes = !$dquotes;
    }
    next if ($dquotes);

    if (defined $2) # 'function name'
    {
    # ------------------------------------
    # Placeholder for exclusions......
    # ------------------------------------

    # Cache the current function index
    push @$Ndx, scalar(@$Funct);
    my ($funcpos, $parampos) = ( $-[0], pos($$src) );

    # Get newlines since last function
    $$Lines += substr ($$src, $offset, $funcpos - $offset) =~ tr/\n//;
    # print $$Lines,"\n";

    # Save positions: function( parms )
    push @$Funct , [$funcpos, $parampos, 0, $$Lines];

    # Recurse this procedure
    ($offset, pos($$src)) = &Find_Function ( $src, $Funct, $funcpos, $parampos, $Ndx, $Lines );
    }
    elsif (defined $3) # '('
    {
    ++$closure;
    }
    elsif (defined $4) # ')'
    {
    --$closure;
    if ($closure <= 0)
    {
    $closure = 0;
    if (@$Ndx)
    {
    # Pop index stack, assign closure, return function/closure positions
    $$Funct[pop @$Ndx][2] = pos($$src);
    return ($offset, pos($$src));
    }
    }
    }
    }

    # To test an error, either take off the closure of a function in its source,
    # or force it this way (but is pseudo error, make sure you have data in @$Funct):
    # push @$Ndx, 1;

    # Should only get here once.
    # Its an error if index stack has elements
    if (@$Ndx)
    {
    ## BAD RETURN ... take this one off, try to recover
    my $func_index = pop @$Ndx;
    my $ref = $$Funct[$func_index];
    $$ref[2] = $$ref[1];
    print STDERR "** Bad return, index = $func_index\n";
    print "** Error! Unclosed function [$func_index], line ".
    $$ref[3].": '".substr ($$src, $$ref[0], $$ref[2] - $$ref[0] )."'\n";
    }
    return ($offset, pos($$src));
    }

    __DATA__
     
    , Feb 6, 2009
    #1
    1. Advertising

  2. Guest

    On Fri, 06 Feb 2009 23:09:47 GMT, wrote:

    >First installment.
    >This was inspired by some other post on here.
    >I was wondering if I could get a review of my preliminary.
    >I need constructive critque's.
    >
    >Thank you!
    >- sln
    >
    >## ===============================================
    >## C_FunctionParser_v1.pl
    >## -------------------------------

    [snip]

    Version 2 - same as v1 except the recursion was taken out of
    Find_Function(). This speeds it up and was not really needed.

    Don't worry I won't be posting meanial fixes, this should have
    been the initial post and just trying to correct the base.

    So version 3 will have significant modifications.
    Mods like exclusions for language intrinsics (for,if,while,case,etc..),
    distinctions for typedefs, macros, prototypes, class declarations, methods,
    and expanding the parametric position data like pre/post line feeds and nesting
    information.

    I won't post any more of this unless it turns out to be a bit more usefull then
    it is right now. If anything it will be a couple of weeks, if it doesen't pan out
    then none at all. There is probably modules that do it all and this is a waste of time.
    But, any helpfull comments on the code or what its doing are welcome.

    - sln

    ## ===============================================
    ## C_FunctionParser_v2.pl @ 2/7/09
    ## -------------------------------
    ## C/C++ Style Function Parser
    ## Idea - To parse out C/C++ style functions
    ## that have parenthetical closures (some don't).
    ## (Could be a package some day, dunno, maybe ..)
    ## - sln
    ## ===============================================
    my $VERSION = 2.0;
    $|=1;

    use strict;
    use warnings;

    # Prototype's
    sub Find_Function(\$\@);

    # File-scoped variables
    my ($FxParse,$FName,$Preamble);

    # Set default function name
    SetFunctionName();

    ## --------
    ## Main
    ## --------

    # Source file
    my $Source = join '', <DATA>;

    # Extended, possibly non-compliant, function name - pattern examples:
    # SetFunctionName(qr/_T/);
    # SetFunctionName(qr/\(\s*void\s*\)\s*function/);
    # SetFunctionName("\\(\\s*void\\s*\\)\\s*function");

    # Parse some functions
    # func ...
    my @Funct = ();
    Find_Function($Source, @Funct);
    # func2 ...
    my @Funct2 = ();
    SetFunctionName(qr/_T/);
    Find_Function($Source, @Funct2);

    # Print @Funct functions found
    # Note that segments can be modified and collated.
    if (!@Funct) {
    print "Function name pattern: '$FName' not found!\n";
    } else {
    print "\nFound ".@Funct." matches.\nFunction pattern: '$FName' \n";
    }
    for my $ref (@Funct) {
    printf "\n\@: %6d - %s\n", $$ref[3], substr($Source, $$ref[0], $$ref[2] - $$ref[0]);
    }

    ## ----------
    ## End
    ## ----------


    # ---------------------------------------------------------

    # Set the parser's function regex pattern
    #
    sub SetFunctionName
    {
    if (!@_) {
    $FName = "_*[a-zA-Z][\\w]*"; # Matches all compliant function names (default)
    } else {
    $FName = shift;
    }
    $Preamble = "\\s*\\(";

    # Compile function parser regular expression
    # Regex condensed:
    # $FxParse = qr!/{2}.*?\n|/\*.*?\*/|\\.|'["()]'|(")|($FName$Preamble)|(\()|(\))!s;
    # | | |1 1|2 2|3 3|4 4
    # Note - Non-Captured, matching items, are meant to consume!
    # -----------------------------------------------------------
    # Regex /xpanded (with commentary):
    $FxParse = # Regex Precedence (items MUST be in this order):
    qr! # -----------------------------------------------
    /{2}.*?\n | # comment - // + anything + end of line
    /\*.*?\*/ | # comment - /* + anything + */
    \\. | # escaped char - backslash + ANY character
    '["()]' | # single quote char - quote then one of ", (, or ), then quote
    (") | # capture $1 - double quote as a flag
    ($FName$Preamble) | # capture $2 - $FName + $Preamble
    (\() | # capture $3 - ( as a flag
    (\)) # capture $4 - ) as a flag
    !xs;
    }

    # Procedure that finds C/C++ style functions
    # (the engine)
    # Notes:
    # - This is not a syntax checker !!!
    # - Nested functions index and closure are cached. The search is single pass.
    # - Parenthetical closures are determined via cached counter.
    # - This precedence avoids all ambigous paranthetical open/close conditions:
    # 1. Dual comment styles.
    # 2. Escapes.
    # 3. Single quoted characters.
    # 4. Double quotes, fip-flopped to determine closure.
    # - Improper closures are reported, with the last one reliably being the likely culprit
    # (this would be a syntax error, ie: the code won't complie, but it is reported as a closure error).
    #
    sub Find_Function(\$\@)
    {
    my ($src,$Funct) = @_;
    my @Ndx = ();
    my @Closure = ();
    my ($Lines,$offset,$closure,$dquotes) = (1,0,0,0);

    while ($$src =~ /$FxParse/g)
    {
    if (defined $1) # double quote "
    {
    $dquotes = !$dquotes;
    }
    next if ($dquotes);

    if (defined $2) # 'function name'
    {
    # ------------------------------------
    # Placeholder for exclusions......
    # ------------------------------------

    # Cache the current function index and current closure
    push @Ndx, scalar(@$Funct);
    push @Closure, $closure;

    my ($funcpos, $parampos) = ( $-[0], pos($$src) );

    # Get newlines since last function
    $Lines += substr ($$src, $offset, $funcpos - $offset) =~ tr/\n//;
    # print $Lines,"\n";

    # Save positions: function( parms )
    push @$Funct , [$funcpos, $parampos, 0, $Lines];

    # Asign new offset
    $offset = $funcpos;
    # Closure is now 1 because of preamble '('
    $closure = 1;
    }
    elsif (defined $3) # '('
    {
    ++$closure;
    }
    elsif (defined $4) # ')'
    {
    --$closure;
    if ($closure <= 0)
    {
    $closure = 0;
    if (@Ndx)
    {
    # Pop index and closure, store position
    $$Funct[pop @Ndx][2] = pos($$src);
    $closure = pop @Closure;
    }
    }
    }
    }

    # To test an error, either take off the closure of a function in its source,
    # or force it this way (pseudo error, make sure you have data in @$Funct):
    # push @Ndx, 1;

    # Its an error if index stack has elements.
    # The last one reported is the likely culprit.
    if (@Ndx)
    {
    ## BAD RETURN ...
    ## All elements in stack have to be fixed up
    while (@Ndx) {
    my $func_index = shift @Ndx;
    my $ref = $$Funct[$func_index];
    $$ref[2] = $$ref[1];
    print STDERR "** Bad return, index = $func_index\n";
    print "** Error! Unclosed function [$func_index], line ".
    $$ref[3].": '".substr ($$src, $$ref[0], $$ref[2] - $$ref[0] )."'\n";
    }
    return 0;
    }
    return 1
    }

    __DATA__
     
    , Feb 7, 2009
    #2
    1. Advertising

Want to reply to this thread or ask your own question?

It takes just 2 minutes to sign up (and it's free!). Just click the sign up button to choose a username and then you can ask your own questions on the forum.
Similar Threads
  1. Alex
    Replies:
    0
    Views:
    411
  2. Dheeraj Kumar
    Replies:
    2
    Views:
    344
    Frank Schmitt
    Sep 24, 2003
  3. Kevin Wan
    Replies:
    5
    Views:
    740
    Kevin Wan
    Jan 17, 2007
  4. www
    Replies:
    51
    Views:
    1,501
  5. Tricky
    Replies:
    4
    Views:
    687
    JimLewis
    Sep 21, 2010
Loading...

Share This Page