J
jacob navia
Hi guys
I have written this small parser to print out the functions defined in a
C file. This is an example of parsing in C, that I want to add to my
tutorial. Comments (and bug reports) are welcome.
-------------------------------------------------------------cut here
/* A simple scanner that will take a file of C source code and
print the names of all functions therein, in the following format:
"Function XXXX found line dddd .... ddddd"
Algorithm. It scans for a terminating parentheses and an immediately
following opening brace. Comments can appear between the closing
paren and the opening braces, but no other characters besides white
space. Functions must have the correct prototype, K & R syntax
is not supported.
*/
#include <stdio.h>
#define MAXID 1024 // Longest Identifier we support. Sorry
// Java guys...
static char IdBuffer[MAXID]; // Buffer for remembering the function name
static int line = 1; // We start at line 1
// This function reads a character and if
// it is \n it bumps the line counter
static int Fgetc(FILE *f)
{
int c = fgetc(f);
if (c == '\n')
line++;
return c;
}
// Return 1 if the character is a legal C identifier
// character, zero if not. The parameter "start"
// means if an identifier START character
// (numbers) is desired.
static int IsIdentifier(int c,int start)
{
if (c >= 'a' && c <= 'z')
return 1;
if (c >= 'A' && c <= 'Z')
return 1;
if (start == 0 && c >= '0' && c <= '9')
return 1;
if (c == '_')
return 1;
return 0;
}
// Just prints the function name
static int PrintFunction(FILE *f)
{
printf("Function %s found line %d ...",IdBuffer,line);
return Fgetc(f);
}
// Reads a global identifier into our name buffer
static int ReadId(char c,FILE *f)
{
int i = 1;
IdBuffer[0] = c;
while (i < MAXID-1) {
c = Fgetc(f);
if (c != EOF) {
if (IsIdentifier(c,0))
IdBuffer[i++] = c;
else break;
}
else break;
}
IdBuffer = 0;
return c;
}
static int ParseString(FILE *f) // Skips strings
{
int c = Fgetc(f);
while (c != EOF && c != '"') {
if (c == '\\')
c = Fgetc(f);
if (c != EOF)
c = Fgetc(f);
}
if (c == '"')
c = Fgetc(f);
return c;
}
static int ParseComment(FILE *f) // Skips comments
{
int c = Fgetc(f);
restart:
while (c != '*') {
c = Fgetc(f);
if (c == EOF)
return EOF;
}
c = Fgetc(f);
if (c == '/')
return Fgetc(f);
else goto restart;
}
static int ParseCppComment(FILE *f) // Skips // comments
{
int c = Fgetc(f);
while (c != EOF && c != '\n') {
if (c == '\\')
c = Fgetc(f);
if (c != EOF)
c = Fgetc(f);
}
if (c == '\n')
c = Fgetc(f);
return c;
}
// Skips white space and comments
static int SkipWhiteSpace(int c,FILE *f) {
if (c > ' ')
return c;
while (c <= ' ') {
c = Fgetc(f);
if (c == '/') {
c = Fgetc(f);
if (c == '*')
c = ParseComment(f);
else if (c == '/')
c = ParseCppComment(f);
}
}
return c;
}
// Skips chars between simple quotes
static int ParseQuotedChar(FILE *f)
{
int c = Fgetc(f);
while (c != EOF && c != '\'') {
if (c == '\\')
c = Fgetc(f);
if (c != EOF)
c = Fgetc(f);
}
if (c == '\'')
c = Fgetc(f);
return c;
}
int main(int argc,char *argv[])
{
if (argc == 1) {
printf("Usage: %s <file.c>\n",argv[0]);
return 1;
}
FILE *f = fopen(argv[1],"r");
if (f == NULL) {
printf("Can't find %s\n",argv[1]);
return 2;
}
int c = Fgetc(f);
int level = 0;
int parenlevel = 0;
int inFunction = 0;
while (c != EOF) {
// Note that each of the switches must advance the
// character read so that we avoid an infinite loop.
switch (c) {
case '"':
c = ParseString(f);
break;
case '/':
c = Fgetc(f);
if (c == '*')
c = ParseComment(f);
else if (c == '/')
c = ParseCppComment(f);
break;
case '\'':
c = ParseQuotedChar(f);
break;
case '{':
level++;
c = Fgetc(f);
break;
case '}':
if (level == 1 && inFunction) {
printf(" %d\n",line);
inFunction = 0;
}
if (level > 0)
level--;
c = Fgetc(f);
break;
case '(':
parenlevel++;
c = Fgetc(f);
break;
case ')':
if (parenlevel > 0)
parenlevel--;
c = Fgetc(f);
if ((parenlevel|level) == 0) {
c = SkipWhiteSpace(c,f);
if (c == '{') {
level++;
inFunction = 1;
c = PrintFunction(f);
}
}
break;
default:
if ((level | parenlevel) == 0 &&
IsIdentifier(c,1))
c = ReadId(c,f);
else c = Fgetc(f);
}
}
fclose(f);
return 0;
}
I have written this small parser to print out the functions defined in a
C file. This is an example of parsing in C, that I want to add to my
tutorial. Comments (and bug reports) are welcome.
-------------------------------------------------------------cut here
/* A simple scanner that will take a file of C source code and
print the names of all functions therein, in the following format:
"Function XXXX found line dddd .... ddddd"
Algorithm. It scans for a terminating parentheses and an immediately
following opening brace. Comments can appear between the closing
paren and the opening braces, but no other characters besides white
space. Functions must have the correct prototype, K & R syntax
is not supported.
*/
#include <stdio.h>
#define MAXID 1024 // Longest Identifier we support. Sorry
// Java guys...
static char IdBuffer[MAXID]; // Buffer for remembering the function name
static int line = 1; // We start at line 1
// This function reads a character and if
// it is \n it bumps the line counter
static int Fgetc(FILE *f)
{
int c = fgetc(f);
if (c == '\n')
line++;
return c;
}
// Return 1 if the character is a legal C identifier
// character, zero if not. The parameter "start"
// means if an identifier START character
// (numbers) is desired.
static int IsIdentifier(int c,int start)
{
if (c >= 'a' && c <= 'z')
return 1;
if (c >= 'A' && c <= 'Z')
return 1;
if (start == 0 && c >= '0' && c <= '9')
return 1;
if (c == '_')
return 1;
return 0;
}
// Just prints the function name
static int PrintFunction(FILE *f)
{
printf("Function %s found line %d ...",IdBuffer,line);
return Fgetc(f);
}
// Reads a global identifier into our name buffer
static int ReadId(char c,FILE *f)
{
int i = 1;
IdBuffer[0] = c;
while (i < MAXID-1) {
c = Fgetc(f);
if (c != EOF) {
if (IsIdentifier(c,0))
IdBuffer[i++] = c;
else break;
}
else break;
}
IdBuffer = 0;
return c;
}
static int ParseString(FILE *f) // Skips strings
{
int c = Fgetc(f);
while (c != EOF && c != '"') {
if (c == '\\')
c = Fgetc(f);
if (c != EOF)
c = Fgetc(f);
}
if (c == '"')
c = Fgetc(f);
return c;
}
static int ParseComment(FILE *f) // Skips comments
{
int c = Fgetc(f);
restart:
while (c != '*') {
c = Fgetc(f);
if (c == EOF)
return EOF;
}
c = Fgetc(f);
if (c == '/')
return Fgetc(f);
else goto restart;
}
static int ParseCppComment(FILE *f) // Skips // comments
{
int c = Fgetc(f);
while (c != EOF && c != '\n') {
if (c == '\\')
c = Fgetc(f);
if (c != EOF)
c = Fgetc(f);
}
if (c == '\n')
c = Fgetc(f);
return c;
}
// Skips white space and comments
static int SkipWhiteSpace(int c,FILE *f) {
if (c > ' ')
return c;
while (c <= ' ') {
c = Fgetc(f);
if (c == '/') {
c = Fgetc(f);
if (c == '*')
c = ParseComment(f);
else if (c == '/')
c = ParseCppComment(f);
}
}
return c;
}
// Skips chars between simple quotes
static int ParseQuotedChar(FILE *f)
{
int c = Fgetc(f);
while (c != EOF && c != '\'') {
if (c == '\\')
c = Fgetc(f);
if (c != EOF)
c = Fgetc(f);
}
if (c == '\'')
c = Fgetc(f);
return c;
}
int main(int argc,char *argv[])
{
if (argc == 1) {
printf("Usage: %s <file.c>\n",argv[0]);
return 1;
}
FILE *f = fopen(argv[1],"r");
if (f == NULL) {
printf("Can't find %s\n",argv[1]);
return 2;
}
int c = Fgetc(f);
int level = 0;
int parenlevel = 0;
int inFunction = 0;
while (c != EOF) {
// Note that each of the switches must advance the
// character read so that we avoid an infinite loop.
switch (c) {
case '"':
c = ParseString(f);
break;
case '/':
c = Fgetc(f);
if (c == '*')
c = ParseComment(f);
else if (c == '/')
c = ParseCppComment(f);
break;
case '\'':
c = ParseQuotedChar(f);
break;
case '{':
level++;
c = Fgetc(f);
break;
case '}':
if (level == 1 && inFunction) {
printf(" %d\n",line);
inFunction = 0;
}
if (level > 0)
level--;
c = Fgetc(f);
break;
case '(':
parenlevel++;
c = Fgetc(f);
break;
case ')':
if (parenlevel > 0)
parenlevel--;
c = Fgetc(f);
if ((parenlevel|level) == 0) {
c = SkipWhiteSpace(c,f);
if (c == '{') {
level++;
inFunction = 1;
c = PrintFunction(f);
}
}
break;
default:
if ((level | parenlevel) == 0 &&
IsIdentifier(c,1))
c = ReadId(c,f);
else c = Fgetc(f);
}
}
fclose(f);
return 0;
}