P
pozz
I want to write a very simple (at least, I thought it was very simple)
parser of a string. This string has the following format:
- a string ID that matches one of a known list (no whitespaces
before), case insensitive
- one or more whitespaces (spaces, tabs, newlines...)
- the word "door" (without quotes), case insensitive
- an optional number of whitespaces, even zero
- a number in the range 0..10
- no other characters after the number
Of course, I want to write a good parser, so avoiding any possible
memory leak, buffer overrun, segmentation fault...
What do you think about the following code? Is there a better way? Did
I forget something? Is it portable (I don't know about strcasecmp()
and strncasecmp())?
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
const char DOOR[] = "DOOR";
const char *ids[] = {
"OPEN",
"CLOSE",
"LOCK"
};
#define ID_NUM (sizeof(ids) / sizeof(ids[0]))
const int DOOR_MIN = 0;
const int DOOR_MAX = 10;
struct {
char *s;
int idx; /* -1 if the parsing shouldn't pass */
int num;
} test[] = {
{ "Open Door 1", 0, 1 },
{ "close DOor 3", 1, 3 },
{ "LoCk \t doOR7", 2, 7 },
{ " Open Door 1", -1 }, /* spaces at the beginning */
{ "close window 1", -1 }, /* window instead of door */
{ "lockk door 1", -1 }, /* lockk instead of lock */
{ "open doorr 1", -1 }, /* doorr instead of door */
{ "lock door 34", -1 }, /* number outside the range 1..10 */
{ "open door main", -1 }, /* number absent */
{ "lock door", -1 }, /* number absent */
{ "close door 1a", -1 } /* the string continues after the
number */
};
#define TEST_NUM (sizeof(test) / sizeof(test[0]))
int
parse(const char *s, int *id_idx, int *num)
{
int i;
if (!isspace(*s)) {
*id_idx = -1;
for (i = 0; i < ID_NUM; i++) {
if (!strncasecmp(ids, s, strlen(ids))) {
*id_idx = i;
break;
}
}
if (*id_idx >= 0) {
s += strlen(ids[*id_idx]);
while (isspace(*s)) {
s++;
}
if (!strncasecmp(DOOR, s, strlen(DOOR))) {
char *ss;
s += strlen(DOOR);
*num = strtol(s, &ss, 10);
if ((ss != s) && (*ss == '\0') && (*num >= DOOR_MIN) && (*num <=
DOOR_MAX)) {
return 1;
}
}
}
}
return -1;
}
int
main(void)
{
int i;
for (i = 0; i < TEST_NUM; i++) {
int ret;
int num;
int id_idx;
printf("Test %2d ", i);
ret = parse(test.s, &id_idx, &num);
if (ret < 0) {
if (test.idx == -1) {
printf("ok\n");
} else {
printf("ERR\n");
}
} else {
if ((test.idx == id_idx) && (test.num == num)) {
printf("ok\n");
} else {
printf("ERR\n");
}
}
}
return 0;
}
parser of a string. This string has the following format:
- a string ID that matches one of a known list (no whitespaces
before), case insensitive
- one or more whitespaces (spaces, tabs, newlines...)
- the word "door" (without quotes), case insensitive
- an optional number of whitespaces, even zero
- a number in the range 0..10
- no other characters after the number
Of course, I want to write a good parser, so avoiding any possible
memory leak, buffer overrun, segmentation fault...
What do you think about the following code? Is there a better way? Did
I forget something? Is it portable (I don't know about strcasecmp()
and strncasecmp())?
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
const char DOOR[] = "DOOR";
const char *ids[] = {
"OPEN",
"CLOSE",
"LOCK"
};
#define ID_NUM (sizeof(ids) / sizeof(ids[0]))
const int DOOR_MIN = 0;
const int DOOR_MAX = 10;
struct {
char *s;
int idx; /* -1 if the parsing shouldn't pass */
int num;
} test[] = {
{ "Open Door 1", 0, 1 },
{ "close DOor 3", 1, 3 },
{ "LoCk \t doOR7", 2, 7 },
{ " Open Door 1", -1 }, /* spaces at the beginning */
{ "close window 1", -1 }, /* window instead of door */
{ "lockk door 1", -1 }, /* lockk instead of lock */
{ "open doorr 1", -1 }, /* doorr instead of door */
{ "lock door 34", -1 }, /* number outside the range 1..10 */
{ "open door main", -1 }, /* number absent */
{ "lock door", -1 }, /* number absent */
{ "close door 1a", -1 } /* the string continues after the
number */
};
#define TEST_NUM (sizeof(test) / sizeof(test[0]))
int
parse(const char *s, int *id_idx, int *num)
{
int i;
if (!isspace(*s)) {
*id_idx = -1;
for (i = 0; i < ID_NUM; i++) {
if (!strncasecmp(ids, s, strlen(ids))) {
*id_idx = i;
break;
}
}
if (*id_idx >= 0) {
s += strlen(ids[*id_idx]);
while (isspace(*s)) {
s++;
}
if (!strncasecmp(DOOR, s, strlen(DOOR))) {
char *ss;
s += strlen(DOOR);
*num = strtol(s, &ss, 10);
if ((ss != s) && (*ss == '\0') && (*num >= DOOR_MIN) && (*num <=
DOOR_MAX)) {
return 1;
}
}
}
}
return -1;
}
int
main(void)
{
int i;
for (i = 0; i < TEST_NUM; i++) {
int ret;
int num;
int id_idx;
printf("Test %2d ", i);
ret = parse(test.s, &id_idx, &num);
if (ret < 0) {
if (test.idx == -1) {
printf("ok\n");
} else {
printf("ERR\n");
}
} else {
if ((test.idx == id_idx) && (test.num == num)) {
printf("ok\n");
} else {
printf("ERR\n");
}
}
}
return 0;
}