std::string and case insensitive comparison

K

kwikius

John said:
bool StrLowCompare( std::string& String1, std::string& String2 )
{
if ( String1.size() != String2.size() )
return false;
for ( std::string::size_type i = 0; i < String1.size(); ++i )
{
if ( tolower( String1 ) != tolower( String2 )
return false;
}
return true;
}

If I had a pound for everytime this mistake is made I would be as rich
as Bill Gates.
tolower( String1 )

is undefined since char may be signed and therefore you may pass a
negative number to tolower. tolower is only defined on integer values in
the range of unsigned char and the value of EOF.
tolower( (unsigned char) String1 )

is correct.
This also means that
std::transform(str.begin(), str.end(), tolower)
is undefined for the same reason.

That wording is a little too harsh. The above code has perfectly
well-defined behavior for quite a lot of input values. To dismiss it as
undefined is like saying *p is undefined since p might be null. I agree,
however, that one can and should do better.

For the use in std::transform(), I would suggest a function object like
this:


<cut, reuseed in following ..>

Can also use accumulate. Needs an iterator to work on the pair of
strings...

I'll leave it to the experts to work out how compliant the following
is;-)

tested in VC7.1 only
regards
Andy Little

#include <locale>
#include <string>
#include <iterator>
#include <utility>
#include <numeric>

//iterator for comapring strings
// Not really sorted what to do on different lengths
// if pos >= shortest
// just compare up to end of shorter here
template <typename Str>
struct string_pair_iterator{

string_pair_iterator(
Str const & c1_in,
Str const & c2_in,
typename Str::size_type pos_in =0
)
:c1(c1_in),c2(c2_in),pos(pos_in)
{
if (pos >= std::min(c1.length(),c2.length())){
pos = Str::npos;
}
}
typedef std::pair<
typename Str::value_type , typename Str::value_type
> value_type;
typedef value_type * pointer;
typedef value_type & reference;
typedef std::forward_iterator_tag iterator_category;

bool operator==(string_pair_iterator const & rhs)const
{
return pos == rhs.pos;
}

bool operator!=(string_pair_iterator const & rhs)const
{
return pos != rhs.pos;
}

value_type operator *() const
{
return value_type(c1[pos],c2[pos]);
}

string_pair_iterator const & operator++()
{
if (pos != Str::npos){
++pos;
if (pos >= std::min(c1.length(),c2.length())){
pos = Str::npos;
}
}

return *this;
}
string_pair_iterator operator++(int)
{
string_pair_iterator temp = *this;
if (pos != Str::npos){
++pos;
if (pos >= min(c1.length(),c2.length())){
pos = Str::npos;
}
}

return temp;
}

private:
typename Str::size_type pos;
Str const & c1;
Str const & c2;
};

//comparator
// F is char modifier as
// to_lower from Kai_Uwe Box below...

template <typename Str, typename F>
struct string_pair_cmp{
string_pair_cmp(F const & f_in = F() )
:f(f_in){}

typedef std::pair<
typename Str::value_type , typename Str::value_type
> pair_type;

bool operator()(bool b,pair_type in)
{
if (! b) return false;
return f(in.first)==f(in.second);
}
private:
F f;
};

//courtesy Kai-Uwe Box..
class to_lower {
std::locale const & loc;
public:
to_lower ( std::locale const & r_loc = std::locale() )
: loc ( r_loc )
{}
template < typename CharT >
CharT operator() ( CharT chr ) const {
return( std::tolower( chr, this->loc ) );
}

}; // class to_lower;


#include <iostream>
int main()
{
std::string str1 = "hellO";
std::string str2 = "Hello";

typedef string_pair_iterator<std::string> striter;
bool res1 = std::accumulate(
striter(str1,str2),
striter(str1,str2,std::min(str1.length(),str2.length())),
true,
string_pair_cmp<std::string,to_lower>())
;
std::cout << res1 <<'\n';

std::string str3 = "hell0";
bool res2 = std::accumulate(
striter(str1,str3),
striter(str1,str3,std::min(str1.length(),str3.length())),
true,
string_pair_cmp<std::string,to_lower>())
;
std::cout << res2 <<'\n';
}
 
K

kwikius

John Harrison wrote:
bool StrLowCompare( std::string& String1, std::string& String2 )
{
if ( String1.size() != String2.size() )
return false;
for ( std::string::size_type i = 0; i < String1.size(); ++i )
{
if ( tolower( String1 ) != tolower( String2 )
return false;
}
return true;
}
If I had a pound for everytime this mistake is made I would be as rich
as Bill Gates.
tolower( String1 )
is undefined since char may be signed and therefore you may pass a
negative number to tolower. tolower is only defined on integer values in
the range of unsigned char and the value of EOF.
tolower( (unsigned char) String1 )
is correct.
This also means that
std::transform(str.begin(), str.end(), tolower)
is undefined for the same reason.

That wording is a little too harsh. The above code has perfectly
well-defined behavior for quite a lot of input values. To dismiss it as
undefined is like saying *p is undefined since p might be null. I agree,
however, that one can and should do better.
For the use in std::transform(), I would suggest a function object like
this:

<cut, reuseed in following ..>

Can also use accumulate. Needs an iterator to work on the pair of
strings...


On reflection a custom short circuit compare would be superior to
std::accumulate

template <typename Iter,typename F>
bool true_for_each(Iter cur, Iter const & end, F const & f)
{
while (cur != end){
if (! f(*cur)) return false;
++cur;
}
return true;
}

regards
Andy little
 
K

kwikius

FWIW Here is a clener solution:

regards
Andy little


#include <locale>
#include <string>
#include <iterator>
#include <utility>
#include <algorithm>
#include <vector>

// random access containers only
// and only fwd iterator
template <typename Container>
struct pair_const_iterator{
typedef Container container_type;
pair_const_iterator(
container_type const & lhs_in,
container_type const & rhs_in,
typename container_type::size_type pos_in
):lhs(lhs_in),rhs(rhs_in),pos(pos_in){}
typedef std::pair<
typename Container::value_type,
typename Container::value_type
> value_type;
typedef value_type * pointer;
typedef value_type & reference;
typedef std::forward_iterator_tag iterator_category;

bool operator==(pair_const_iterator const & rhs)const
{
return pos == rhs.pos;
}

bool operator!=(pair_const_iterator const & rhs)const
{
return pos != rhs.pos;
}

value_type operator *() const
{
return value_type(lhs[pos],rhs[pos]);
}

pair_const_iterator const & operator++()
{
++pos;
return *this;
}
pair_const_iterator operator++(int)
{
pair_const_iterator temp = *this;
++pos;
return temp;
}

private:
typename container_type::size_type pos;
container_type const & lhs;
container_type const & rhs;
};

// sequence consisting of a pair of
// readonly containers
template <typename Container>
struct const_container_pair{
typedef Container container_type;
typedef std::pair<
typename Container::value_type,
typename Container::value_type
> value_type;
typedef pair_const_iterator<Container> const_iterator;

const_container_pair(
container_type const & lhs_in, container_type const & rhs_in
): lhs(lhs_in),rhs(rhs_in),
end_pos(std::min(lhs_in.size(),rhs_in.size())){}

const_container_pair(
container_type const & lhs_in, container_type const & rhs_in,
typename container_type::size_type end_pos_in
): lhs(lhs_in),rhs(rhs_in), end_pos(end_pos_in){}

const_iterator begin() const
{
return const_iterator(lhs,rhs,0);
}
const_iterator end() const
{
return const_iterator(lhs,rhs,end_pos);
}
private:
container_type const &lhs;
container_type const & rhs;
typename container_type::size_type const end_pos;
};


template <typename F>
struct not_equal_result{
not_equal_result(F const & f_in = F() )
:f(f_in){}

template <typename Pair>
bool operator()(Pair const & in)const
{
return f(in.first)!=f(in.second);
}
template<typename T1, typename T2>
bool operator()(T1 first, T2 second)const
{
return f(first)!=f(second);
}
private:
F f;
};
struct identity{

template <typename T>
T const & operator()(T const & in)const
{
return in;
}
};

//courtesy Kai-Uwe Box..
class to_lower {
std::locale const & loc;
public:
to_lower ( std::locale const & r_loc = std::locale() )
: loc ( r_loc )
{}
template < typename CharT >
CharT operator() ( CharT chr ) const {
return( std::tolower( chr, this->loc ) );
}

}; // class to_lower;

#include <iostream>
int main()
{
std::string str1 = "hi";
std::string str2 = "HI";

const_container_pair<std::string> pair(str1,str2);

bool res = std::find_if(
pair.begin(), pair.end(),not_equal_result<to_lower>()
) == pair.end();
std::cout << res <<'\n';

str1 += "de";
str2 += "LL";

//n.b pair expcts immutable containers
// create another pair to reflect size change...
const_container_pair<std::string> paira(str1,str2);

bool resa = std::find_if(
paira.begin(), paira.end(),not_equal_result<to_lower>()
) == paira.end();
std::cout << resa <<'\n';

std::vector<int> v1;

v1.push_back(1);
v1.push_back(2);
v1.push_back(3);

std::vector<int> v2;
v2.push_back(1);
v2.push_back(2);
v2.push_back(3);

const_container_pair<std::vector<int> > pair1(v1,v2);

bool res1 = std::find_if(
pair1.begin(), pair1.end(),not_equal_result<identity>()
) == pair1.end();
std::cout << res1 <<'\n';

v1.push_back(-1);
v2.push_back(2);

//as above
const_container_pair<std::vector<int> > pair2(v1,v2);

bool res2 = std::find_if(
pair2.begin(), pair2.end(),not_equal_result<identity>()
) == pair2.end();
std::cout << res2 <<'\n';
}
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

No members online now.

Forum statistics

Threads
473,770
Messages
2,569,583
Members
45,074
Latest member
StanleyFra

Latest Threads

Top