regex.h

G

gert

based on http://souptonuts.sourceforge.net/code/regexp.c.html

#include <stdio.h>
#include <string.h>
#include <regex.h>

int split(char * output[100], char * input){
regex_t preg;
regmatch_t pmatch[100];
size_t i;
char inputsub[sizeof(input)];

if(regcomp (&preg, "select", REG_EXTENDED)) output[0]="Invalid
expression";
else {
if(regexec (&preg, input, 100, pmatch, 0)) output[0]="No
match"; else {
for (i = 0; i <= preg.re_nsub; i++){
strncpy (inputsub, input + pmatch.rm_so, pmatch.rm_eo -
pmatch.rm_so);
//inputsub[pmatch.rm_eo - pmatch.rm_so] = '\0'; //
<-------????????
output=inputsub;
}
}
}
regfree (&preg);
printf (""); //<----------????????????????
return 0;
}

int xml(char * input){
char * output[100];
split(output,input);
printf("Content-Type: text/xml; charset=utf-8"
"\r\n"
"\r\n"
"<xml>%s</xml>"
"\n",output[0]);

return 0;
}

is this line really necessary ?

inputsub[pmatch.rm_eo - pmatch.rm_so] = '\0';

and why do i have to do printf ("") ? if i dont do printf("") the
second printf doesnt have a output[0] value ?

printf("Content-Type: text/xml; charset=utf-8"
"\r\n"
"\r\n"
"<xml>%s</xml>"
"\n",output[0]);
 
J

Jens Thoms Toerring

gert said:
#include <stdio.h>
#include <string.h>
#include <regex.h>
int split(char * output[100], char * input){
regex_t preg;
regmatch_t pmatch[100];
size_t i;
char inputsub[sizeof(input)];

This looks very wrong. 'input' is a char pointer so its sizeof()
is going to be a very small number. The resulting array is rather
likely to be much too short for holding what you are going to copy
to it. I guess you somehow want to make the string as long as
the the amount of memory 'input' points to. But you can't do that
since there's no information how much taht is - a pointer does
not contain any information about the amount of memory it
points to. Perhaps you meant

char inputsub[ strlen( input ) + 1 ];

But keep in mind that this is only possible in C99, in C89 there
were no arrays of lengths that could set at runtime (VLAs), you
would have to use malloc() to obtain as much memory as you need
here.
if(regcomp (&preg, "select", REG_EXTENDED)) output[0]="Invalid
expression";
else {
if(regexec (&preg, input, 100, pmatch, 0)) output[0]="No
match"; else {
for (i = 0; i <= preg.re_nsub; i++){
strncpy (inputsub, input + pmatch.rm_so, pmatch.rm_eo -
pmatch.rm_so);


Here you copy only a certain number of characters, and if this is
shorter than the source string the destination string isn't ended
with a '\0' (strncpy() does *not* add one by itself), so the desti-
nation string isn't a string at all.
//inputsub[pmatch.rm_eo - pmatch.rm_so] = '\0'; //
<-------????????


And that line, if uncommented, adds a '\0' to the destination
string, thus making it a real string.
output=inputsub;


And here things go badly wrong. 'inputsub' is a local array and
goes out of scope the moment you leave this function, so whatever
the elements of 'output' are pointing to then is memory you can't
use anymore. Moreover, you assign the same address to all the ele-
ments of 'output' - copying a pointer isn't the same as copying a
string (and for that you would need additional memory).
}
}
}
regfree (&preg);
printf (""); //<----------????????????????

This is actually completely useless.
return 0;
}
int xml(char * input){
char * output[100];
split(output,input);
printf("Content-Type: text/xml; charset=utf-8"
"\r\n"
"\r\n"
"<xml>%s</xml>"
"\n",output[0]);
return 0;
}
and why do i have to do printf ("") ? if i dont do printf("") the
second printf doesnt have a output[0] value ?

Probably because you have some bad mistakes in your program and
for some strange reason putting in the printf("") makes it look
as if they weren't there. If you invoke undefined behaviour by
e.g. writing past the end of an array or by using pointers to
memory that already has gone out of scope basically everything
can happen - it may even look as if it would work flawlessly
or may look as if it works if you add some otherwise completely
useless code.
Regards, Jens
 
K

Keith Thompson

gert said:
int split(char * output[100], char * input){
[...]

I'll just make one point about this particular line; I might come back
with more comments later.

Parameter declarations that look like array declarations are tricky.
In this case:

char *output[100]

the "100" will be silently ignored by the compiler. What looks like
an array parameter declaration is really a pointer parameter
declaration (it's not converted as most array expressions are, it's
actually translated at compile time). It would be clearer to write
it as:

int split(char *output[], char *input)

or even what I'd prefer:

int split(char **output, char *input);

See section 6 of the copm.lang.c FAQ, <http://www.c-faq.com/>.
 
G

gert

Second attempt

int split(char ** output, char * input){
regex_t reg;
regmatch_t match[100];
size_t i;

if(regcomp (&reg, "select", REG_EXTENDED)) output[0]="Invalid
expression";
else {
if(regexec (&reg, input, 100, match, 0)) output[0]="No match";
else {
for (i = 0; i <= reg.re_nsub; i++){
strncpy (output, input + match.rm_so, match.rm_eo -
match.rm_so);
output[match.rm_eo - match.rm_so] = '\0';
}
}
}
regfree (&reg);
return 0;
}

int xml(char * input){
char output[100][strlen( input ) + 1];
split(output,input);
printf("Content-Type: text/xml; charset=utf-8"
"\r\n"
"\r\n"
"<xml>%s</xml>"
"\n",output[0]);
return 0;
}

gert@gert:~$ Desktop/svn/db/c/make.sh
In file included from /home/gert/Desktop/svn/db/c/post.c:4:
/home/gert/Desktop/svn/db/c/xml.c: In function 'xml':
/home/gert/Desktop/svn/db/c/xml.c:8: warning: passing argument 1 of
'split' from incompatible pointer type
Content-Type: text/xml; charset=utf-8

<xml>o?</xml>
gert@gert:~$

I did something wrong again
 
G

gert

#include <string.h>
#include <regex.h>

int split(char output[100][100], char * input){
char * temp;
regex_t reg;
regmatch_t match[100];
size_t i,tlen;

if(regcomp (&reg, "select", REG_EXTENDED)){
temp="Invalid expression";
tlen=strlen(temp)+1;
strncpy (output,temp,tlen);
output[tlen] = '\0';
}
else {
if(regexec (&reg, input, 100, match, 0)){
temp="No match";
tlen=strlen(temp)+1;
strncpy (output,temp,tlen);
output[tlen] = '\0';
}
else {
for (i = 0; i <= reg.re_nsub; i++){
strncpy (output, input + match.rm_so, match.rm_eo -
match.rm_so);
output[match.rm_eo - match.rm_so] = '\0';
}
}
}
regfree (&reg);
return 0;
}

int xml(char * input){
char output[100][100];
split(output,input);
printf("Content-Type: text/xml; charset=utf-8"
"\r\n"
"\r\n"
"<xml>%s</xml>"
"\n",output[0]);
return 0;
}

this works but i want to make char output[100][100] dynamic so i can
have as many matches as needed ?
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

No members online now.

Forum statistics

Threads
473,770
Messages
2,569,583
Members
45,072
Latest member
trafficcone

Latest Threads

Top