This will match '*' and '?' expressions. I've also seen a version
of this floating around that's actually a little smaller but it used
recursion so this should be a little faster.
Here's a recursive version of the regex-style pattern matcher,
translated from some Pascal source I forget where. It should be
very easy to remove the regex functionality, at which point you'll
have a regular old DOS-style '*'/'?' wildcard matcher.
Bugfixes welcome.
-Arthur
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <limits.h>
#include "RegEx.h" /* just the corresponding header file */
/*
* A relatively inefficient and simple version of regular expressions.
* Recurses on the end of each text in order to determine whether it
* matches the given regex.
*
* Currently supports:
*
* . Any character
* [...] Any character in a set
* [^..] Any character NOT in a set
* X Plaintext character
* \ Backslash escape, inside and outside of sets
* * Repeat zero or more times
* + Repeat one or more times
* ? Repeat zero or one times
*
* matches_regex() returns 1 on success, 0 on failure, or -1 if given
* a malformed regular expression.
*
*/
#define RE_ANY 1
#define RE_SET 2
#define RE_NOTSET 3
#define RE_ONE 4
static int matches_single(int ch, int type, char matchset[UCHAR_MAX+1]);
static int m_regex(const char *text, const char *regex, int match_case)
{
int to_match;
char matchset[UCHAR_MAX+1] = {0};
if (*regex == '\0') {
return (*text == '\0');
}
switch (*regex)
{
case '.':
to_match = RE_ANY;
++regex;
break;
case '[':
{
to_match = RE_SET;
++regex;
if (*regex == '^') {
to_match = RE_NOTSET;
++regex;
}
for (++regex; *regex != ']'; ++regex) {
if (*regex == '\\') {
++regex;
}
if (*regex == '\0')
return -1;
matchset[(int) *regex] = 1;
if (match_case == 0) {
matchset[toupper(*regex)] = 1;
matchset[tolower(*regex)] = 1;
}
}
++regex;
break;
}
default:
{
if (*regex == '\\') {
++regex;
if (*regex == '\0') return -1;
}
to_match = RE_ONE;
matchset[(int) *regex] = 1;
if (match_case == 0) {
matchset[toupper(*regex)] = 1;
matchset[tolower(*regex)] = 1;
}
++regex;
break;
}
}
if (*regex == '+') {
/* Match at least one character. */
int i;
if (*text == '\0')
return 0;
for (i=0; matches_single(text
, to_match, matchset); ++i) {
int tmp = m_regex(text+i+1, regex+1, match_case);
if (tmp) return tmp;
}
return 0;
}
else if (*regex == '*') {
/* Match any number of things. */
int i;
int tmp;
tmp = m_regex(text, regex+1, match_case);
if (tmp) return tmp;
for (i=0; text && matches_single(text, to_match, matchset); ++i) {
tmp = m_regex(text+i+1, regex+1, match_case);
if (tmp) return tmp;
}
return 0;
}
else if (*regex == '?') {
/* Match zero or one things. */
int tmp;
tmp = m_regex(text, regex+1, match_case);
if (tmp) return tmp;
if (*text && matches_single(*text, to_match, matchset)) {
tmp = m_regex(text+1, regex+1, match_case);
}
return tmp;
}
else {
/* Match exactly one thing. */
if (*text == '\0')
return 0;
else if (matches_single(*text, to_match, matchset)) {
return m_regex(text+1, regex, match_case);
}
else return 0;
}
}
static int matches_single(int ch, int type, char matchset[UCHAR_MAX+1])
{
if (type == RE_ANY) {
return 1;
}
else if (type == RE_SET) {
return (matchset[ch]);
}
else if (type == RE_NOTSET) {
return ! (matchset[ch]);
}
else if (type == RE_ONE) {
return (matchset[ch]);
}
return 0;
}
int matches_regex(const char *text, const char *regex)
{
return m_regex(text, regex, 1);
}
int matchesi_regex(const char *text, const char *regex)
{
return m_regex(text, regex, 0);
}