S
Somebody
Hi,
I'm trying to write the function below and have it working, but I
benchmarked it against strcmp() and its much slower. My test is to compare 2
strings that are identitical 100,000,000 times. Yeah, thats a lot, but I
needed to scale up the test to get a real timing on the function. The weird
thing is, even when I comment out almost the entire function, its still much
slower then strcmp()... at full implementation, strcmp() takes 5 seconds and
my function takes 78 seconds. If I comment everything out in my function
except the main while loop, it still takes 13 seconds. WTH?!?! I *AM* in
debug mode, but so is the crt library.
Basically the goal of this function is to perform a LOGICAL string compare
vs. a textual one. This is also called a NATURAL string compare by some...
For example, a normal string compare will sort as follows:
string1
string10
string2
where as the natural or logical string compare will sort as follows:
string1
string2
string10
taking digits into account as numbers.
the strings I'm comparing are two copies of:
"Sample description [1/10]"
I tried swapping out isdigit() with a quick & dirty macro, and that sped it
up quite a bit, but 5 vs. 78 seconds seems far off. I understand my function
is doing a lot more then strcmp(), but I was expecting maybe 20 to 30
seconds for my function. Not 78 seconds.
P.S. the if (n1 == n2) {} portion of the code is to sort zero padded #'s
correctly... ie... 01 should appear before 1.
I also tried implementing it with strtol()... that allowed me to get rid of
the two while loops that go to the end of the number and some of the pointer
math, but strtol() was sooooo much slower.
--- code below ---
#define SCLF_IGNORECASE 0x00000001
#define DIGIT(x) (x >= _T('0') && x <= _T('9'))
//#define DIGIT(x) isdigit(x)
int __cdecl StrCmpLogical(LPCTSTR lpsz1, LPCTSTR lpsz2, DWORD dwFlags)
{
if ((dwFlags != 0) && (dwFlags != SCLF_IGNORECASE))
return 0;
if ((lpsz1 != NULL) && (lpsz2 != NULL))
{
while (*lpsz1 != _T('\0'))
{
if (*lpsz2 == _T('\0'))
return 1;
if (/*_istdigit*/DIGIT(*lpsz1))
{
if (!/*_istdigit*/DIGIT(*lpsz2))
return -1;
int n1 = _ttoi(lpsz1);
int n2 = _ttoi(lpsz2);
if (n1 < n2)
return -1;
else if (n1 > n2)
return 1;
LPCTSTR lpszOrig1 = lpsz1;
LPCTSTR lpszOrig2 = lpsz2;
while (/*_istdigit*/DIGIT(*lpsz1))
lpsz1++;
while (/*_istdigit*/DIGIT(*lpsz2))
lpsz2++;
if (n1 == n2)
{
int nOffset1 = (int)(lpsz1 - lpszOrig1);
int nOffset2 = (int)(lpsz2 - lpszOrig2);
if (nOffset1 > nOffset2)
return -1;
else if (nOffset1 < nOffset2)
return 1;
}
}
else if (/*_istdigit*/DIGIT(*lpsz2))
{
return 1;
}
else
{
if (dwFlags & SCLF_IGNORECASE)
{
TCHAR ch1 = _totlower(*lpsz1);
TCHAR ch2 = _totlower(*lpsz2);
if (ch1 < ch2)
return -1;
else if (ch1 > ch2)
return 1;
}
else
{
if (*lpsz1 < *lpsz2)
return -1;
else if (*lpsz1 > *lpsz2)
return 1;
}
lpsz1++;
lpsz2++;
}
}
}
else
{
if ((lpsz1 == NULL) && (lpsz2 != NULL))
return -1;
if ((lpsz1 == NULL) && (lpsz2 == NULL))
return 0;
if ((lpsz1 != NULL) && (lpsz2 == NULL))
return 1;
}
return 0;
}
I'm trying to write the function below and have it working, but I
benchmarked it against strcmp() and its much slower. My test is to compare 2
strings that are identitical 100,000,000 times. Yeah, thats a lot, but I
needed to scale up the test to get a real timing on the function. The weird
thing is, even when I comment out almost the entire function, its still much
slower then strcmp()... at full implementation, strcmp() takes 5 seconds and
my function takes 78 seconds. If I comment everything out in my function
except the main while loop, it still takes 13 seconds. WTH?!?! I *AM* in
debug mode, but so is the crt library.
Basically the goal of this function is to perform a LOGICAL string compare
vs. a textual one. This is also called a NATURAL string compare by some...
For example, a normal string compare will sort as follows:
string1
string10
string2
where as the natural or logical string compare will sort as follows:
string1
string2
string10
taking digits into account as numbers.
the strings I'm comparing are two copies of:
"Sample description [1/10]"
I tried swapping out isdigit() with a quick & dirty macro, and that sped it
up quite a bit, but 5 vs. 78 seconds seems far off. I understand my function
is doing a lot more then strcmp(), but I was expecting maybe 20 to 30
seconds for my function. Not 78 seconds.
P.S. the if (n1 == n2) {} portion of the code is to sort zero padded #'s
correctly... ie... 01 should appear before 1.
I also tried implementing it with strtol()... that allowed me to get rid of
the two while loops that go to the end of the number and some of the pointer
math, but strtol() was sooooo much slower.
--- code below ---
#define SCLF_IGNORECASE 0x00000001
#define DIGIT(x) (x >= _T('0') && x <= _T('9'))
//#define DIGIT(x) isdigit(x)
int __cdecl StrCmpLogical(LPCTSTR lpsz1, LPCTSTR lpsz2, DWORD dwFlags)
{
if ((dwFlags != 0) && (dwFlags != SCLF_IGNORECASE))
return 0;
if ((lpsz1 != NULL) && (lpsz2 != NULL))
{
while (*lpsz1 != _T('\0'))
{
if (*lpsz2 == _T('\0'))
return 1;
if (/*_istdigit*/DIGIT(*lpsz1))
{
if (!/*_istdigit*/DIGIT(*lpsz2))
return -1;
int n1 = _ttoi(lpsz1);
int n2 = _ttoi(lpsz2);
if (n1 < n2)
return -1;
else if (n1 > n2)
return 1;
LPCTSTR lpszOrig1 = lpsz1;
LPCTSTR lpszOrig2 = lpsz2;
while (/*_istdigit*/DIGIT(*lpsz1))
lpsz1++;
while (/*_istdigit*/DIGIT(*lpsz2))
lpsz2++;
if (n1 == n2)
{
int nOffset1 = (int)(lpsz1 - lpszOrig1);
int nOffset2 = (int)(lpsz2 - lpszOrig2);
if (nOffset1 > nOffset2)
return -1;
else if (nOffset1 < nOffset2)
return 1;
}
}
else if (/*_istdigit*/DIGIT(*lpsz2))
{
return 1;
}
else
{
if (dwFlags & SCLF_IGNORECASE)
{
TCHAR ch1 = _totlower(*lpsz1);
TCHAR ch2 = _totlower(*lpsz2);
if (ch1 < ch2)
return -1;
else if (ch1 > ch2)
return 1;
}
else
{
if (*lpsz1 < *lpsz2)
return -1;
else if (*lpsz1 > *lpsz2)
return 1;
}
lpsz1++;
lpsz2++;
}
}
}
else
{
if ((lpsz1 == NULL) && (lpsz2 != NULL))
return -1;
if ((lpsz1 == NULL) && (lpsz2 == NULL))
return 0;
if ((lpsz1 != NULL) && (lpsz2 == NULL))
return 1;
}
return 0;
}