Is this String class properly implemented?

K

kevintse.onjee

I am new to the C++ language, I implement this String class just for
practising.
I want to know what to improve to make it *qualified* to be used in
real programs, or what code should be changed to make it more
efficient...yea, I know you will recommend me to using std::string,
(-:just practising...

Here's the code:
#ifndef STRING_H_
#define STRING_H_
#include "Core.h"

class String{
private:
wchar_t* value;
int size;
public:
String();
String(const wchar_t* src);
String(const wchar_t* src, int offset, int count);
String(String& s);
String(const int& num);
String(const long& l);
String(const float& f);
String(const double& d);
String(const wchar_t& c);
operator const wchar_t* () const {
return value;
}
~String(){
size = 0;
delete [] value;
}
int length() const;
const wchar_t& wchar_tAt(int index) const;
//the length of the substring is endIndex-beginIndex
String substring(int beginIndex, int endIndex) const;
String substring(int beginIndex) const;
int indexOf(const wchar_t& c) const;
int indexOf(const wchar_t& c, int fromIndex) const;
int indexOf(const String& s) const;
int indexOf(const String& s, int fromIndex) const;
int indexOf(const wchar_t* s) const;
int indexOf(const wchar_t* s, int fromIndex) const;

int lastIndexOf(wchar_t c) const;
int lastIndexOf(wchar_t c, int fromIndex) const;
int lastIndexOf(const String& s) const;
int lastIndexOf(const String& s, int fromIndex) const;
int lastIndexOf(const wchar_t* s) const;
int lastIndexOf(const wchar_t* s, int fromIndex) const;
/*
//these two functions cannot be properly implemented using plain
pointers, an Array class is required?
String** split(const wchar_t& c) const;
String** split(const String& s) const;
*/
String toLowerCase() const;
String toUpperCase() const;
bool isEmpty() const;
bool startsWith(const String* s) const;
bool endsWith(const String* s) const;
bool startsWith(const wchar_t* s) const;
bool endsWith(const wchar_t* s) const;
bool equals(const String* s) const;
bool equals(const wchar_t* s) const;
String trim();
String trimLeft();
String trimRight();
const wchar_t* toCString() const;
String& operator+=(const String& str);
String& operator+=(const wchar_t* str);
String operator=(String& str);
String operator=(const wchar_t* str);
String operator+(const wchar_t* str) const;
bool operator==(const String* s) const;
};

String operator+(const String& str1, const String& str2);
String operator+(const wchar_t* str1, const String& str2);
#endif

#include "String.h"

String::String(){
size = 0;
value = new wchar_t[1]; //one wchar_t to store the terminating null
value[0] = '\0';
}

String::String(const int& num) {
if(num == minInteger)
String(L"-2147483648");
else{
int lenOfNumeric = num < 0 ? ::getLengthOfNumeric<int>(-num) +
1 : ::getLengthOfNumeric<int>(num);
value = new wchar_t[lenOfNumeric + 1];
::getChars(num, value, lenOfNumeric);
value[lenOfNumeric] = '\0';
size = lenOfNumeric;
}
}

String::String(const long& num) {
if(num == minInteger)
String(L"-2147483648");
else{
int lenOfNumeric = num < 0 ? ::getLengthOfNumeric<long>(-num) +
1 : ::getLengthOfNumeric<long>(num);
value = new wchar_t[lenOfNumeric + 1];
::getChars(num, value, lenOfNumeric);
value[lenOfNumeric] = '\0';
size = lenOfNumeric;
}
}

//will fail if str is null
String::String(const wchar_t* str){
size = wcslen(str);
value = new wchar_t[size + 1];
if(size > 0)
wcscpy(value, str);
else
value[0] = '\0';
}

//copy constructor
String::String(String& s){
size = s.length();
if(size == 0){
value = new wchar_t[1];
value[0] = '\0';
}else{
value = new wchar_t[size + 1];
wcscpy(value, s.value);
}
}

//will fail if str is null
String::String(const wchar_t* str, int offset, int count){
size = 0;
int len = wcslen(str);
if(len == 0 || offset >= len){
//in this case, create an empty String
value = new wchar_t[1];
value ='\0';
}else{
if(offset < 0) offset = 0;
if(count > len - offset) count = len - offset;
value = new wchar_t[count + 1];
int index = 0;
int endIndex = offset + count;
for(int i = offset; i < endIndex; ++i){
value[index] = str;
++index;
}
value[index] = '\0';
//only in this case will the size of the String be changed
size = count;
}
}

int String::length() const {
return size;
}

const wchar_t& String::wchar_tAt(int index) const{
if(size == 0 || index < 0 || index >= size) throw
StringIndexOutOfBoundsException(new String(L"out of bounds."));
return value[index];
}

String String::substring(int beginIndex, int endIndex) const {
if(size == 0 || beginIndex < 0 || beginIndex >= size) throw
StringIndexOutOfBoundsException(new String(L"out of bounds."));
if(endIndex > size) throw StringIndexOutOfBoundsException(new String
(L"out of bounds."));
if(beginIndex > endIndex) throw StringIndexOutOfBoundsException(new
String(L"out of bounds."));

wchar_t* temp = new wchar_t[endIndex - beginIndex + 1];
int index = 0;
for(int i = beginIndex; i < endIndex; ++i){
temp[index] = value;
++index;
}
temp[index] = '\0';
String s(temp);
delete [] temp;
return s;
}

String String::substring(int beginIndex) const{
if(size == 0 || beginIndex < 0 || beginIndex >= size) throw
StringIndexOutOfBoundsException(new String(L"out of bounds."));
return substring(beginIndex, size);
}

int String::indexOf(const wchar_t& c) const {
return indexOf(c, 0);
}

int String::indexOf(const wchar_t& c, int fromIndex) const {
if(fromIndex < 0) fromIndex = 0;
else if(fromIndex >= size) return -1;
for(int i = fromIndex; i < size; ++i){
if(value == c)
return i;
}
return -1;
}

int String::indexOf(const String& s) const {
return indexOf(s.toCString());
}

int String::indexOf(const String& s, int fromIndex) const {
return indexOf(s.toCString(), fromIndex);
}

int String::indexOf(const wchar_t* s) const {
return indexOf(s, 0);
}

int String::indexOf(const wchar_t* s, int fromIndex) const {
if(!s || size == 0) return -1;
if(fromIndex < 0) fromIndex = 0;
else if(fromIndex >= size) return -1;
int len = wcslen(s);
if(len == 0) return -1;
if(len + fromIndex > size) return -1;
int countMatched = 0;
int firstFoundIndex = 0;
for(int i = fromIndex; i < size; ++i){
firstFoundIndex = i;
countMatched = 0;
if(value == s[countMatched]){//found first letter
do{
++countMatched;
if(countMatched == len) return i; //if all chars in "s" are found
in a row, then the search is a success, return the index
++firstFoundIndex;
}while(firstFoundIndex < size && value[firstFoundIndex] == s
[countMatched]); //ensures that the loop does not step over bounds
}
}
return -1;
}

int String::lastIndexOf(wchar_t c) const {
return lastIndexOf(c, size - 1);
}
int String::lastIndexOf(wchar_t c, int fromIndex) const {
if(size == 0 || fromIndex < 0) return -1;
if(fromIndex >= size) fromIndex = size - 1;
for(int i = fromIndex; i >= 0; --i){
if(value == c)
return i;
}
return -1;
}

int String::lastIndexOf(const String& s) const {
return lastIndexOf(s.toCString(), size - 1);
}
int String::lastIndexOf(const String& s, int fromIndex) const {
return lastIndexOf(s.toCString(), fromIndex);
}

int String::lastIndexOf(const wchar_t* s) const {
return lastIndexOf(s, size - 1);
}

int String::lastIndexOf(const wchar_t* s, int fromIndex) const {
if(!s || size == 0) return -1;
if(fromIndex < 0) return -1;
else if(fromIndex >= size) fromIndex = size - 1;
int len = wcslen(s);
if(len == 0) return -1;
int countMatched = 0;
int firstFoundIndex = 0;
for(int i = fromIndex; i >= 0; --i){
firstFoundIndex = i;
countMatched = 0;
if(value == s[countMatched]){//found first letter
do{
++countMatched;
if(countMatched == len) return i; //if all chars in "s" are found
in a row, then the search is a success, return the index
++firstFoundIndex;
}while(firstFoundIndex < size && value[firstFoundIndex] == s
[countMatched]); //ensures that the loop does not step over bounds
}
}
return -1;
}


String String::toLowerCase() const {
if(size == 0) return String();
wchar_t* temp = new wchar_t[size + 1];
for(int i = 0; i < size; ++i){
if(value >= 65 && value <= 90)
temp = value | 0x20; //convert to lower case
else
temp = value;
}
temp[size] = '\0';
String s(temp);
delete [] temp;
return s;
}
String String::toUpperCase() const {
if(size == 0) return String();
wchar_t* temp = new wchar_t[size + 1];
for(int i = 0; i < size; ++i){
if(value >= 97 && value <= 122)
temp = value & 0x5F; //convert to upper case
else
temp = value;
}
temp[size] = '\0';
String s(temp);
delete [] temp;
return s;
}

bool String::isEmpty() const {
if(size > 0) return false;
return true;
}

String String::trimLeft() {
if(size == 0) return *this;
int beginIndex = 0;
if(value[beginIndex] == ' '){ //if the String starts with a space
while(++beginIndex < size)
if(value[beginIndex] != ' ')
return substring(beginIndex, size);
}
return *this;
}

String String::trimRight() {
if(size == 0) return *this;
int endIndex = size - 1;
if(value[endIndex] == ' '){ //if the String ends with a space,
which precedes the terminating '\0'
while(--endIndex >= 0)
if(value[endIndex] != ' ')
return substring(0, ++endIndex); //++endIndex is a must, since
length of the substring is endIndex-beginIndexx
}
return *this;
}

String String::trim(){
if(size == 0) return *this;
int beginIndex = 0;
int endIndex = size - 1;

if(value[beginIndex] == ' '){ //if the String starts with a space
while(++beginIndex < size)
if(value[beginIndex] != ' ')
break;
}
if(value[endIndex] == ' '){ //if the String ends with a space,
which precedes the terminating '\0'
while(--endIndex >= 0)
if(value[endIndex] != ' '){
++endIndex;
break;
}
}
if(beginIndex != 0 || endIndex != size - 1)
return substring(beginIndex, endIndex);
return *this;
}

bool String::startsWith(const String* s) const {
if(!s) return false;
if(this == s || (size == 0 && s->length() == 0)) return true;
if(size < s->length()) return false;
for(int i = 0; i < s->length(); ++i){
if(value != s->value)
return false;
}
return true;
}

bool String::startsWith(const wchar_t* s) const {
if(!s) return false;
int len = wcslen(s);
if(size == 0 && len == 0) return true;
if(size < len) return false;
for(int i = 0; i < len; ++i){
if(value != s)
return false;
}
return true;
}

bool String::endsWith(const String* s) const {
if(!s) return false;
if(this == s || (size == 0 && s->length() == 0)) return true;
if(size < s->length()) return false;
int oriStrIndex = size;
for(int i = s->length() - 1; i >= 0; --i){
if(value[--oriStrIndex] != s->value)
return false;
}
return true;
}

bool String::endsWith(const wchar_t* s) const {
if(!s) return false;
int len = wcslen(s);
if(size == 0 && len == 0) return true;
if(size < len) return false;
int oriStrIndex = size;
for(int i = len - 1; i >= 0; --i){
if(value[--oriStrIndex] != s)
return false;
}
return true;
}

bool String::equals(const wchar_t* s) const {
if(!s) return false;
int len = wcslen(s);
if(size == 0 && len == 0) return true;
if(size != len) return false;
for(int i = 0; i < size; ++i){
if(value != s) return false;
}
return true;
}

bool String::equals(const String* s) const {
if(!s) return false;
if(this == s || (size == 0 && s->length() == 0)) return true;
if(size != s->length()) return false;
for(int i = 0; i < size; ++i){
if(value != s->value) return false;
}
return true;
}

/*

static String valueOf(long l);
static String valueOf(float f);
static String valueOf(double d);
static String valueOf(wchar_t c);
*/

String operator +(const String& str1, const String& str2){
if(str1.length() == 0 && str2.length()) return String();
wchar_t* temp = new wchar_t[str1.length() + str2.length() + 1];
temp[0] = '\0';
if(str1.length() > 0)
wcscpy(temp, str1.toCString());
if(str2.length() > 0)
wcscat(temp, str2.toCString());
String s(temp);
delete [] temp;
return s;
}

String String::eek:perator +(const wchar_t* str) const{
if(!str) return String(value);
int len = wcslen(str);
if(len == 0) return String(value);
wchar_t* temp = new wchar_t[size + len + 1];
temp[0] = '\0';
wcscpy(temp, value);
wcscat(temp, str);
String s(temp);
delete [] temp;
return s;
}

String operator+(const wchar_t* str1, const String& str2) {
int size = 0;
if(!str1) size = wcslen(str1);
wchar_t* temp = new wchar_t[size + str2.length() + 1];
temp[0] = '\0';
if(size > 0)
wcscpy(temp, str1);
if(str2.length() > 0)
wcscat(temp, str2.toCString());
String s(temp);
delete [] temp;
return s;
}

String& String::eek:perator +=(const String& str) {
if(str.length() == 0) return *this;
size += str.length();
wchar_t* temp = new wchar_t[size + 1];
temp[0] = '\0';
wcscpy(temp, value);
wcscat(temp, str.toCString());
delete [] value;
value = temp;
return *this;
}

String& String::eek:perator +=(const wchar_t* str) {
if(!str) return *this;
int len = wcslen(str);
if(len == 0) return *this;
size += len;
wchar_t* temp = new wchar_t[size + 1];
temp[0] = '\0';
wcscpy(temp, value);
wcscat(temp, str);
delete [] value;
value = temp;
return *this;
}

String String::eek:perator =(String& str) {
return str;
}
String String::eek:perator =(const wchar_t* str) {
if(!str) return String();
return String(str);
}

bool String::eek:perator ==(const String* s) const{
return this == s;
}
//ensures that the returned C-style string cannot be changed
const wchar_t* String::toCString() const {
return value;
}
 
S

SG

I am new to the C++ language, I implement this String class just for
practising.
Ok.

class String{
private:
        wchar_t* value;
        int size;

The typical approach would be to use two integers (of some type like
std::size_t). The 2nd integer -- usually called "capacity" -- stores
the size of the array whereas "size" (the logical length of the
string) may be smaller. I didn't check your implementation for
operator+= but you usually don't want to allocate a new character
array every time you add a new character to the string.

You also may want to throw out some member functions and replace them
with free functions if appropriate. std::string is an example of a
"bloated super class".
public:
        String(String& s);

Your copy constructor doesn't take a referece-to-const?
        operator const wchar_t* () const {
                return value;
        }

This is dangerous since this allows *implicit* conversion to a pointer
that points to memory that is *managed* by the string object. If the
string object dies and deletes the char array you basically have an
invalid pointer. Example:

String foo();

void bar() {
wchar_t * invalid = foo();
// accessing "invalid" invokes undefined behaviour
}

This is why std::string has a c_str() member function for this as
opposed to an implicit conversion operator.

Also: Consider making some constructors explicit. Check your textbook
on explicit constructors and conversion.
        ~String(){
                size = 0;
                delete [] value;
        }

"size = 0;" is pretty much useless here.
        //these two functions cannot be properly implemented using
// plain pointers, an Array class is required?
        String** split(const wchar_t& c) const;
        String** split(const String& s) const;

It's not clear from the signature and the functions' names what it is
exactly the functions do and return. Do you intent do create String
objects dynamically as well as an array of String pointers? Please
don't. Instead, write a *free* function (not a member function) that
does the splitting. This free function could return a vector<String>
object or it could be a function template that takes an "output
iterator" like this:

template<typename OutIter>
void split(String const& str, wchar_t at, OutIter oiter) {
...
*oiter = some_substring;
++oiter;
...
}

which allows you to write

void foo(String const& x) {
vector<String> blah;
split(x,'.',back_inserter(blah));
}

        String trim();
        String trimLeft();
        String trimRight();

....are just three examples that could have been free functions
instead. Also, since you seem to be creating a new String object for
the result as return value you seem to have forgotten 'const'
qualifiers.
        String operator=(String& str);

Your copy assignment doesn't take a reference-to-const String?
#include "String.h"

String::String(){
        size = 0;
        value = new wchar_t[1]; //one wchar_t to store the terminating null
        value[0] = '\0';
}

You might want to delay the allocation until you need to return a
wchar_t pointer to a zero terminated string. This should make
creating empty strings faster.


[snipped a whole lot of code I'm too lazy to go through]


Cheers!
SG
 
S

SG

Oh, I have clicked the wrong link, and only replied to the author SG

Let me include it here.

Actually, I intended to write an *unmutable* String, which only had
logical length, its length was not supposed to be changeable. This is
also the Java implementation of the String class. huh, I took the idea
from Java.
OK.
This is dangerous since this allows *implicit* conversion to a pointer
that points to memory that is *managed* by the string object. If the
string object dies and deletes the char array you basically have an
invalid pointer.

So if I want to return a C style string, I have to allocate new memory
for the string? and let the caller of the function deal with the
deallocation of the memory? But, most of the time, that I call this
function is just to obtain a C style string that can be output with
"iostream" related functions.

I haven't said anything against returning your internal wchar_t*
converted to pointer-to-const. I reasoned against an implicit
conversion operator.
~String(){
size = 0;
delete [] value;
}

"size = 0;" is pretty much useless here.

Set size to zero is necessary, because I return "size" directly when
the "length()" function is called, this function tells the actual size
of the String. And this is definitely more efficient than calling
"wcslen(value)" everytime we need a "size".

You're not entirely familiar with the concept of destructors, are you?
I was refering to the Java implementation of the class in which these
functions return new Strings, so, you know, I copied... oh, Java does
not have a pointer, or a reference...

I know that. But that's beside the point.
OK, these functions are all supposed to return references for better
efficiency, cause I am using C++.

....and what kind of reference would that be? A reference to the
object itself? Do you want these functions to mutate the String
object or just return a modified copy?

Never ever return a reference to a non-static function-local object.
Never ever.
Oh, I have forgotten that...


Cheers!
SG
 
S

SG

Actually, I intended to write an *unmutable* String, which only had
logical length, its length was not supposed to be changeable. This is
also the Java implementation of the String class. huh, I took the idea
from Java.

You can do that, of course. But when you write

String foo = "hello";
String bar = foo;

in JAVA you're only copying a reference and not the object. So, in
order to emulate this in C++ you would use a reference-counted
character array that can be shared among multiple instances of your
String class.

Otherwise every String object would manage its own character array and
there would be no point in preventing any mutations.


Cheers!
SG
 
K

kevintse.onjee

I haven't said anything against returning your internal wchar_t*
converted to pointer-to-const. I reasoned against an implicit
conversion operator.

std::c_str() does return a pointer to its internal char array, too. I
still don't see the difference between using a function and implicit
conversion to return the internal wchar_t*, both methods are
*dangerous*, aren't they?
Actually, I have another function "toCString()" which does the same
thing as the implicit conversion, but it was weird enough that when I
tried to test this String class with "std::wcout", "std::wcout <<
myString" output numbers(as we know, wchar_t is unsigned short), while
"std::wcout << myString.toCString()" output exactly what I put in the
myString object...
~String(){
size = 0;
delete [] value;
}
"size = 0;" is pretty much useless here.
Set size to zero is necessary, because I return "size" directly when
the "length()" function is called, this function tells the actual size
of the String. And this is definitely more efficient than calling
"wcslen(value)" everytime we need a "size".

You're not entirely familiar with the concept of destructors, are you?

The String itself is already out of scope when the destructor is
called automatically, and there's no any chance the "length()"
function will be called, right?
I know that. But that's beside the point.


...and what kind of reference would that be? A reference to the
object itself? Do you want these functions to mutate the String
object or just return a modified copy?

Never ever return a reference to a non-static function-local object.
Never ever.

what is "non-static function-local object"?
Cheers!
SG

Kevin
Regards
 
S

SG

std::c_str() does return a pointer to its internal char array, too. I
still don't see the difference between using a function and implicit
conversion to return the internal wchar_t*, both methods are
*dangerous*, aren't they?

Yes, but one more than the other because the implicit conversion can
be triggered unintentionally by the programmer. In your case you rely
on the fact that the original String objects lives long enough. So,
it's not really "converted" into some other form of representation.
what is "non-static function-local object"?

Don't be lazy. Search the web or look into your favorite C++ textbook.


Cheers!
SG
 
A

Alf P. Steinbach

* Daniel T.:
That said, your class is largely immutable, the sole mutators are the
two op+= and the two op= functions. If you went all the way and removed
all four of them, you could make your class even more time efficient by
using a data sharing implementation. If you choose to keep these four
functions, then maybe you should consider including more functions for
mutation and go with an implementation that is more efficient in the
face of mutation.

Assignment/concatenation works well with immutable strings (don't know about the
OP's code, though).

It's much of the point.


Cheers & hth.,

- Alf
 
S

SG

Assignment/concatenation works well with immutable strings

IMHO, "immutable strings" can be misunderstood. I think by "strings"
you refer to the internal char array representation as opposed to a
user-defined string class object that manages this char array. I
would still want to mutate those string objects in the same way I can
change the value of some int variable. :)

Cheers!
SG
 
A

Alf P. Steinbach

* Daniel T.:
Tell us more.

Seems to me that assignment and concatenation aren't
approprate in an immutable type:

String a("hello");
String b = a;
assert(b.length() == a.length());
OK.


a += " world";
assert(b.length() == a.length()); // how could this not fire?

The assertion here does not hold.

and if it does [not hold], how is 'a' immutable?

'a' refers to an immutable string value, a string value that can't be modified.

You can change which string value 'a' refers to.

You can not change the string value itself (although 'a' can, when it can
guarantee that it is the one and only reference, which it easily can).

That is the only practical usage of the word "immutable" for strings, and it is
(not just in fact but also necessarily) the meaning of "immutable" when we refer
to strings in other languages, or certain C++ string classes, as immutable.


Cheers & hth.,

- Alf
 
A

Alf P. Steinbach

* Daniel T.:
Lot's of people read your sage advice, even if they agree with it. :)

He he. :)

Seems to me that assignment and concatenation aren't appropriate in
an immutable type:

String a("hello");
String b = a;
assert(b.length() == a.length());
a += " world";
assert(b.length() == a.length()); // how could this not fire?
The assertion here does not hold.
and if it does [not hold], how is 'a' immutable?
'a' refers to an immutable string value, a string value that can't
be modified.

You can change which string value 'a' refers to.

This treats 'string' as a 'const char*' replacement doesn't it? But a
'const char*' is mutable (although that which it points to is not,) and
as such, calling 'string' immutable would be a misnomer.
That is the only practical usage of the word "immutable" for
strings, and it is (not just in fact but also necessarily) the
meaning of "immutable" when we refer to strings in other languages,
or certain C++ string classes, as immutable.

I can see your point when it comes to languages that treat variables as
pointers to objects (without the pointer arithmetic of course.) The
object may not be mutable, but the variable can be assigned to hold
other objects. But even in those cases, op+= isn't really appropriate is
it? And '.append()' would return a different object and thus require
re-assignment.

I can see your point. We could consider "that which a string contains"
as immutable and then any non-const member-function would reseat the
string rather than changing its contents, but if that was the case, why
stop with op= and op+=? Why not go ahead and implement a whole host of
other non-const functions?

Efficiency and the principle of least surprise.

op+= can be really, really efficient whether the string is immutable or not,
because it can provide time amortized linear in the total result length simply
by increasing the buffer by some factor (usually 2) on reallocation.

And for std::string it is efficient that way. Unfortunately, in common Java
implementations it reportedly isn't, even though it could very easily be. It
wouldn't really help with better Java implementations either, since what matters
is the *guaranteed* behavior, amortized linear, versus possibly quadratic.

However, for an immutable string an operation such as setChar( index, value )
would have O(n) behavior in the length of the string, instead of constant time.
It could have amortized constant time for the special case of repeatedly
modifying the same string a number of times proportional to the string length,
but it could then be surprising to most that in usual cases of doing single
char-changing the setChar operation would have linear time.

So assignment and concatenation (and not to forget, substring, at constant
time!) are easy to do very efficiently with essentially no surprise factor, no
gremlins jumping up and biting the careless programmer, while an operation such
as setChar could only be efficient for special cases.


Cheers & hth.,

- Alf
 
K

kevintse.onjee

In the functions: String::String(const int&) and String::String(const
long&), you are putting the null terminator at value[size] but in other
functions you are putting the null terminator at value[size + 1].

No, I allocate "size + 1" memory for all cases, and put the null
terminators at "size" position.
Also, those two functions are identical. They shouldn't be identical,
but even so, if properly implemented they would be almost identical.
Consider breaking out the parts that are alike into a separate function.

Yea, they shouldn't be identical. But they seem to have the same
maximum value and minimum value in most platforms, so you can see that
almost all the code are identical besides the "::getLengthOfNumeric"
function invocation.
For const wchar_t& String::wchar_tAt(int index) const... Doing something
within a throw that might cause a throw (in this case, allocating
memory) is bad form.

The function name was charAt(), I did a global replacement in the
whole file and not notice this until now.
The new String(L"out of bounds.") instantiation is hard coded, I can
ensure that no exceptions will be thrown as long as memory allocation
for this string succeeds. And for the case of insufficient memory, I
can't do anything, can I?
String String::substring(int beginIndex, int endIndex) const... in the
last constructor I mentioned above, you corrected offset and count when
they were bad values, but here you throw if beginIndex and endIndex are
bad values. Consistency is important.

You are right. For consistency, I may have to throw exceptions in all
cases.
You have an "operator const wchar_t* () const" and a "const wchar_t*
toCString() const" that both do the same thing. Others have already
suggested you remove the former. Generally, having two member-functions
that do the exact same thing is unnecessary duplication (though it
sometimes makes sense in the face of inheritance, but even then one of
the functions should be implemented in terms of the other.)

There was only a toCString() function at the first place, I added the
implicit conversion just for tasting the semantics sugar, I just knew
about implicit conversion two days ago, but I didn't know this would
cause problems in some cases.

If you are going to consistently put a null terminator on the end of
your 'value' array, you don't really need the 'size' variable at all;
it's a rather minor optimization that is more than overwhelmed by the
fact that your class has to reallocate the value array every time
someone increases the length of the string.

I maintain this size variable just for that minor optimization and
convenient use, because in some cases I will have to query the length
of the string many times, I can simply call the length (which returns
size directly) function as many time as needed, and I don't need to
declare a new variable to hold the length of the string.

I provide two operator overloading functions that will reallocate
memory for the internal wchar_t*, this is just for convenient use too,
this class is supposed to be used as an unmutable one in most cases,
if I have to do lots of concatenation, I will use another
implementation of the this class, it is called a StringBuffer in Java,
which is a typical mutable string class.
 
K

kevintse.onjee

String String::toLowerCase() const {
if(size == 0) return String();
wchar_t* temp = new wchar_t[size + 1]; for(int i = 0; i < size; + +i){
if(value >= 65 && value <= 90)
temp = value | 0x20; //convert to lower case
else
temp = value;
}
temp[size] = '\0';
String s(temp);
delete [] temp;
return s;
}
String String::toUpperCase() const {
if(size == 0) return String();
wchar_t* temp = new wchar_t[size + 1]; for(int i = 0; i < size; + +i){
if(value >= 97 && value <= 122)
temp = value & 0x5F; //convert to upper case
else
temp = value;
}
temp[size] = '\0';
String s(temp);
delete [] temp;
return s;
}


is completely useless from my perspective. How about yours?

String(L"åäö").toUpperCase(); // <-!?


But this is convenient when you need to do case conversions of English
words.
 
M

Martin Eisenberg

String::String(const int& num) {
if(num == minInteger)
String(L"-2147483648");

That line makes a temporary object that is immediately destroyed
again. The String under construction does not get initialized.
Seemingly you haven't exercised this execution path. The only way to
really put code through its paces is programmatic testing; see "unit
test".

Also, I haven't seen a definition of minInteger in your post --
perhaps it's in Core.h, but arguably this is more obscure than
overtly mentioning std::numeric_limits here.
value = new wchar_t[1];
value[0] = '\0';

You keep writing that in any number of places. The definition of what
initializing to empty means should be expressed exactly once,
probably in the same private function you need to correct the above-
mentioned mistake.
throw StringIndexOutOfBoundsException(
new String(L"out of bounds."));

Even if the "new" doesn't fail as someone cautioned against, who will
delete the error message? Probably the exception class; better though
to make the question moot. Just store a pointer to the literal like
std exceptions do, or at least use a smart pointer.


Martin
 
J

James Kanze

String String::toLowerCase() const {
if(size == 0) return String();
wchar_t* temp = new wchar_t[size + 1]; for(int i = 0; i < size; + +i){
if(value >= 65 && value <= 90)
temp = value | 0x20; //convert to lower case
else
temp = value;
}
temp[size] = '\0';
String s(temp);
delete [] temp;
return s;
}
String String::toUpperCase() const {
if(size == 0) return String();
wchar_t* temp = new wchar_t[size + 1]; for(int i = 0; i < size; ++i){
if(value >= 97 && value <= 122)
temp = value & 0x5F; //convert to upper case
else
temp = value;
}
temp[size] = '\0';
String s(temp);
delete [] temp;
return s;
}

is completely useless from my perspective. How about yours?
String(L"åäö").toUpperCase(); // <-!?

But this is convenient when you need to do case conversions of English
words.

No. About the only use it could have is as an example of how
not to do the job. It fails on implementations not using ASCII,
for example, and it fails for English words like "naïve".
 
J

James Kanze

IMHO, "immutable strings" can be misunderstood. I think by
"strings" you refer to the internal char array representation
as opposed to a user-defined string class object that manages
this char array. I would still want to mutate those string
objects in the same way I can change the value of some int
variable. :)

C++ supports value semantics, and a string class really should
behave as a value. This means no mutator functions, but does
allow using assignment to change the value. Because C++ also
supports things like += on built-in value types, one can also
argue that a string class should support it. On the other hand,
any possibility of modifying a string without using an = sign is
poor design (because of the lack of identity).
 
J

James Kanze

Immutable: not subject or susceptible to change or variation
in form or quality or nature.
In that case, what does it mean for the string to be
immutable? After all, you can think of an int as an array of
bits, each of which can be accessed individually and modified,
should an 'immutable' string then allow users to access and
modify individual bytes (or bits for that matter)?

You can assign to an int, to change its value. For that matter,
you can write things like i |= 0x40. (I'm not sure what the
equivalent should be with a string.)
 
J

James Kanze

* Daniel T.:

[...]
You can change which string value 'a' refers to.
You can not change the string value itself (although 'a' can,
when it can guarantee that it is the one and only reference,
which it easily can).
That is the only practical usage of the word "immutable" for
strings, and it is (not just in fact but also necessarily) the
meaning of "immutable" when we refer to strings in other
languages, or certain C++ string classes, as immutable.

I think I agree with you, but just to be sure we're on the same
wavelength: what you're basically saying is that (supposing all
variables have string type):

a = b ; // fine
a += b ; // also OK
a.replace( 5, 3, b ) ; // no, except that
a = a.replace( 5, 3, b ) ; // it's OK for a member
// function to return a new
// value, derived from the old.

Just curious, but what do you think about:
a.substr( 5, 3 ) = b ; // the equivalent of the last
// example, above.

(My pre-standard string class supported this; in fact, it
supported:
a( 5, 3 ) = b ; // same as a = a.replace( 5, 3, b )
But that was designed a very long time ago. I've not given much
thought to the question lately, as to whether I'd do the same
thing today.)
 
S

SG

C++ supports value semantics, and a string class really should
behave as a value.

It just seems we attach a slightly different meaning to the expression
"value semantics" and "mutation".
This means no mutator functions, but does

This is where we differ. I'm not talking about values but objects
that have a state that represents a value. In that respect operator=,
operator+= are mutators because the state of the object is mutated to
reflect a new value. This applies to built-in types, too, of course.
Mutating and value semantics are not mutually exclusive.

I thought this discussion is about a possible implementation of string
class (i.e. sharing reference-counted character arrays that are not
altered -- hence "immutable").

If you say "immutable string" but allow assignment and probably +=
then you have a certain idea of what a "string" is and what the thing
is you use for storing a string value. It doesn't seem to match the
"object can be in a state that represents a string value"-idea.

Do you see where I'm going with this?
[...]  On the other hand,
any possibility of modifying a string without using an = sign is
poor design (because of the lack of identity).

I count "poor design" as opinion. "lack of identidy" needs some more
explaining, especially with respect to how any string mutator would be
any different than an assignment or += for ints, for example. =, +=,
*= ... just have a special syntax. To avoid misunderstandings: By
"string" in "string mutator" I meant the string *object* and not a
string *value*.

I think this is really just a terminology/point-of-view issue w.r.t.
mutation and whether by "string" you mean "string object" or "string
value". So, if I assign a new "string value" to a "string object" I
mutate this object so that it represents the new string value.

Obviously the expression "immutable string" can be confusing. That's
why I raised some concern.


Cheers!
SG
 
A

Alf P. Steinbach

* James Kanze:
* Daniel T.:
[...]
You can change which string value 'a' refers to.
You can not change the string value itself (although 'a' can,
when it can guarantee that it is the one and only reference,
which it easily can).
That is the only practical usage of the word "immutable" for
strings, and it is (not just in fact but also necessarily) the
meaning of "immutable" when we refer to strings in other
languages, or certain C++ string classes, as immutable.

I think I agree with you, but just to be sure we're on the same
wavelength: what you're basically saying is that (supposing all
variables have string type):

a = b ; // fine
a += b ; // also OK
a.replace( 5, 3, b ) ; // no, except that
a = a.replace( 5, 3, b ) ; // it's OK for a member
// function to return a new
// value, derived from the old.
Yes.


Just curious, but what do you think about:
a.substr( 5, 3 ) = b ; // the equivalent of the last
// example, above.

It can improve efficiency to have it, when it's done repeatedly.

However, I think it can be much more clear to /explicitly/ obtain a mutable
string or string buffer for that.

Then there's no surprise about the O(n) (for n length of a) lurking in there;
it's then clear that it may occur in explicitly obtaining the mutable string /
buffer.


Cheers,

- Alf
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

No members online now.

Forum statistics

Threads
473,770
Messages
2,569,583
Members
45,074
Latest member
StanleyFra

Latest Threads

Top