S
szr
I encountered this strange problem while making a graphical application
under Windows and I was able to make a simple test program that
demonstrates the problem that appears to be occuring (if indeed it is a
problem and I'm not just missing something.)
It runs under both windows (using either Embarcadero's or Borland's C++
Builders, though I was unable to test under any version of Visual Studio
VC++ since it appears to lack any PCRE libraries) or g++ under linux.
Begin Demonstration Code
----------------------------------------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <string.h> // strncpy.
#ifdef __GNUG__
#include <regex.h> // regcomp, regexec.
#else
#include <pcreposix.h> // regcomp, regexec.
// Since Embaradero and Borland don't seem to define this.
#define REG_EXTENDED 1
#endif
void error(const char* title, regex_t &re, int status) {
char err_msg[128];
regerror(status, &re, err_msg, 128);
printf( "[%s] Error: %s\n", title, err_msg );
}
void clean_up( regex_t &re, regmatch_t* pmatch = NULL ) {
if ( pmatch ) delete[] pmatch;
regfree( &re );
}
int main() { // int argc, char* argv[]
char str[128] = "$$$ ABC123def ###";
char pat[128] = "([A-Za-z]+)([0-9]+)([A-Za-z]+)";
regex_t re;
int status;
if ((status = regcomp(&re, pat, REG_EXTENDED)) != 0) {
error("regcomp", re, status);
clean_up( re );
exit( -1 );
}
// !!! Changing "re.re_nsub + 3" to just "re.re_nsub" in the next
// !!! two lines seems to exibit the problem. I don't understand
// !!! why it only works right with 3 additional slots allocated.
// ( See OUTPUT section below for comparisons. )
regmatch_t *pmatch = new regmatch_t[ re.re_nsub + 3 ];
if ((status = regexec(&re, str, re.re_nsub + 3, pmatch, 0)) != 0) {
error("regexec", re, status);
clean_up( re, pmatch );
exit( -1 );
}
for (unsigned int i=0; i<=re.re_nsub; i++) {
char cap[128] = ""; // Capture Group.
strncpy(
cap,
str + pmatch.rm_so,
pmatch.rm_eo - pmatch.rm_so
);
printf(
"[%2d] so[%2d] eo[%2d] m[%s]\n",
i,
pmatch.rm_so,
pmatch.rm_eo,
cap
);
}
// Clean up.
delete[] pmatch;
regfree( &re );
getchar(); // Pause (so cmd.exe doesn't close under Windows.)
return 0;
}
/*
OUTPUT COMPARISON:
// Using: re.re_nsub
// Under linux, sometimes I get
// *** glibc detected *** ./test: malloc():
// memory corruption: 0x0804a960 ***
// instead of the below.
[ 0] so[ 0] eo[ 9] m[ABC123def]
[ 1] so[ 0] eo[ 3] m[ABC]
[ 2] so[-1] eo[ 0] m[]
[ 3] so[ 0] eo[ 0] m[]
// Using; re.re_nsub + 1
// Sometimes I get the same crash as noted above.
[ 0] so[ 0] eo[ 9] m[ABC123def]
[ 1] so[ 0] eo[ 3] m[ABC]
[ 2] so[-1] eo[ 0] m[]
[ 3] so[-1] eo[-1] m[]
// Using: re.re_nsub + 2
// Sometimes I get the same crash as noted above.
[ 0] so[ 0] eo[ 9] m[ABC123def]
[ 1] so[ 0] eo[ 3] m[ABC]
[ 2] so[ 3] eo[ 6] m[123]
[ 3] so[-1] eo[ 3] m[]
// Using: re.re_nsub + 3
// Only with 3 extra indices allocated does it work right. But why?
[ 0] so[ 0] eo[ 9] m[ABC123def]
[ 1] so[ 0] eo[ 3] m[ABC]
[ 2] so[ 3] eo[ 6] m[123]
[ 3] so[ 6] eo[ 9] m[def]
*/
----------------------------------------------------------------------
End Demonstration Code
As can be seen from the output snapshots above, if I change
regmatch_t *pmatch = new regmatch_t[ re.re_nsub + 3 ];
if ((status = regexec(&re, str, re.re_nsub + 3, pmatch, 0)) != 0) {
to
regmatch_t *pmatch = new regmatch_t[ re.re_nsub ];
if ((status = regexec(&re, str, re.re_nsub, pmatch, 0)) != 0) {
Then it doesn't display the matches (capture groups) correctly and even
sometimes crashes outright with a
*** glibc detected *** ./test: malloc(): memory corruption: ...
error from glibc, under Linux. Otherwise I get the first output
comparison above.
Thanks for any insight on this. I admit I'm not used to working with
regex in C or C++, so maybe I'm missing something, but I can't seem to
find anything in any documentation about a need for (at least) 3 extra
slots to be allocated.
under Windows and I was able to make a simple test program that
demonstrates the problem that appears to be occuring (if indeed it is a
problem and I'm not just missing something.)
It runs under both windows (using either Embarcadero's or Borland's C++
Builders, though I was unable to test under any version of Visual Studio
VC++ since it appears to lack any PCRE libraries) or g++ under linux.
Begin Demonstration Code
----------------------------------------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <string.h> // strncpy.
#ifdef __GNUG__
#include <regex.h> // regcomp, regexec.
#else
#include <pcreposix.h> // regcomp, regexec.
// Since Embaradero and Borland don't seem to define this.
#define REG_EXTENDED 1
#endif
void error(const char* title, regex_t &re, int status) {
char err_msg[128];
regerror(status, &re, err_msg, 128);
printf( "[%s] Error: %s\n", title, err_msg );
}
void clean_up( regex_t &re, regmatch_t* pmatch = NULL ) {
if ( pmatch ) delete[] pmatch;
regfree( &re );
}
int main() { // int argc, char* argv[]
char str[128] = "$$$ ABC123def ###";
char pat[128] = "([A-Za-z]+)([0-9]+)([A-Za-z]+)";
regex_t re;
int status;
if ((status = regcomp(&re, pat, REG_EXTENDED)) != 0) {
error("regcomp", re, status);
clean_up( re );
exit( -1 );
}
// !!! Changing "re.re_nsub + 3" to just "re.re_nsub" in the next
// !!! two lines seems to exibit the problem. I don't understand
// !!! why it only works right with 3 additional slots allocated.
// ( See OUTPUT section below for comparisons. )
regmatch_t *pmatch = new regmatch_t[ re.re_nsub + 3 ];
if ((status = regexec(&re, str, re.re_nsub + 3, pmatch, 0)) != 0) {
error("regexec", re, status);
clean_up( re, pmatch );
exit( -1 );
}
for (unsigned int i=0; i<=re.re_nsub; i++) {
char cap[128] = ""; // Capture Group.
strncpy(
cap,
str + pmatch.rm_so,
pmatch.rm_eo - pmatch.rm_so
);
printf(
"[%2d] so[%2d] eo[%2d] m[%s]\n",
i,
pmatch.rm_so,
pmatch.rm_eo,
cap
);
}
// Clean up.
delete[] pmatch;
regfree( &re );
getchar(); // Pause (so cmd.exe doesn't close under Windows.)
return 0;
}
/*
OUTPUT COMPARISON:
// Using: re.re_nsub
// Under linux, sometimes I get
// *** glibc detected *** ./test: malloc():
// memory corruption: 0x0804a960 ***
// instead of the below.
[ 0] so[ 0] eo[ 9] m[ABC123def]
[ 1] so[ 0] eo[ 3] m[ABC]
[ 2] so[-1] eo[ 0] m[]
[ 3] so[ 0] eo[ 0] m[]
// Using; re.re_nsub + 1
// Sometimes I get the same crash as noted above.
[ 0] so[ 0] eo[ 9] m[ABC123def]
[ 1] so[ 0] eo[ 3] m[ABC]
[ 2] so[-1] eo[ 0] m[]
[ 3] so[-1] eo[-1] m[]
// Using: re.re_nsub + 2
// Sometimes I get the same crash as noted above.
[ 0] so[ 0] eo[ 9] m[ABC123def]
[ 1] so[ 0] eo[ 3] m[ABC]
[ 2] so[ 3] eo[ 6] m[123]
[ 3] so[-1] eo[ 3] m[]
// Using: re.re_nsub + 3
// Only with 3 extra indices allocated does it work right. But why?
[ 0] so[ 0] eo[ 9] m[ABC123def]
[ 1] so[ 0] eo[ 3] m[ABC]
[ 2] so[ 3] eo[ 6] m[123]
[ 3] so[ 6] eo[ 9] m[def]
*/
----------------------------------------------------------------------
End Demonstration Code
As can be seen from the output snapshots above, if I change
regmatch_t *pmatch = new regmatch_t[ re.re_nsub + 3 ];
if ((status = regexec(&re, str, re.re_nsub + 3, pmatch, 0)) != 0) {
to
regmatch_t *pmatch = new regmatch_t[ re.re_nsub ];
if ((status = regexec(&re, str, re.re_nsub, pmatch, 0)) != 0) {
Then it doesn't display the matches (capture groups) correctly and even
sometimes crashes outright with a
*** glibc detected *** ./test: malloc(): memory corruption: ...
error from glibc, under Linux. Otherwise I get the first output
comparison above.
Thanks for any insight on this. I admit I'm not used to working with
regex in C or C++, so maybe I'm missing something, but I can't seem to
find anything in any documentation about a need for (at least) 3 extra
slots to be allocated.