Why does strcat mess up the tokens in strtok (and strtok_r)?

D

DFS

Using strcat in the middle of looping thru the tokens actually changes
the tokens occurring after the point at which strcat is called.

Anybody know why? And how to fix it?

Thanks


====================================================================================


Output:

[dfs@home files]$ ./strtokens
* using strtok, with no calls to strcat
string before: Look at him working, darning his socks in the night when
there's nooboody there

token: Look
token: at
token: him
token: working,
token: darning
token: his
token: socks
token: in
token: the
token: night
token: when
token: there's
token: nooboody
token: there

string after: Look
Longest word is 8 characters
-----------------------------------------------------------------------------
* using strtok, with calls to strcat
string before: Look at him working, darning his socks in the night when
there's nooboody there

token: Look
token: at
token: him
token: working,
(this token has strlen(8), so strcat is called
and it alters the remaining tokens, including
those in the next call to strtok_r?!?)
token: da
token: h
token: s
token: s
token: c
token: s
token: the
token: ht
token: whe
token: the
token: e's
token: b
token: dy
token: the
token: e

string after: Look
Longest word(s), len 8: working,
-----------------------------------------------------------------------------
* using strtok_r, with calls to strcat
string before: Look at him working, darning his socks in the night when
there's nooboody there

token: L
token: at
token: h
token: m
token: w
token: da
token: h
token: s
token: s
token: c
token: s
token: the
token: ht
token: whe
token: the
token: e's
token: b
token: dy
token: the
token: e

string after: L
Longest word(s), len 8:
-----------------------------------------------------------------------------

Should be 'Longest word(s), len 8: working, nooboody'





====================================================================================


#include <stdio.h>
#include <string.h>


//GET LENGTH OF LONGEST WORD IN STRING
//strcat NOT USED
int getMaxWordLength(char *str, char *delim) {
char *token;
token = strtok(str, delim);
int maxWordLen = (int)strlen(token);

while(token != NULL)
{
printf("token: %s\n", token);
if ((int)strlen(token) > maxWordLen) {
maxWordLen = (int)strlen(token);
}
token = strtok(NULL, delim);
}
return maxWordLen;
}


//GET LIST OF LONGEST WORDS IN STRING.
//USE strtok AND strcat
int getLongestWords(char *str, char *longestWords, int maxWordLen, char
*delim) {
char *token;
token = strtok(str, delim);
while(token != NULL)
{
printf("token: %s\n", token);
if ((int)strlen(token) == maxWordLen) {
strcat(longestWords, token);
strcat(longestWords, " ");
}
token = strtok(NULL, delim);
}
return 0;
}

//GET LIST OF LONGEST WORDS IN STRING.
//USE strtok_r AND strcat
int getLongestWords_r(char *str, char *longestWords, int maxWordLen,
char *delim) {
char *token;
char* sptr;
token = strtok_r(str, delim, &sptr);
while(token != NULL)
{
printf("token: %s\n", token);
if ((int)strlen(token) == maxWordLen) {
strcat(longestWords, token);
strcat(longestWords, " ");
}
token = strtok_r(NULL, delim, &sptr );
}
return 0;
}



int main(void)
{

char splitStr[200] = "Look at him working, darning his socks in the
night when there's nooboody there";
char splitStrA[200] = "";
char splitStrB[200] = "";
char splitStrC[200] = "";
char delim[2] = " ";
char longestWordsOne[] = "";
char longestWordsTwo[] = "";

//MAKE COPIES OF STRING FOR USE WITH CALLS TO strtok AND strtok_r
strcpy(splitStrA, splitStr);
strcpy(splitStrB, splitStr);
strcpy(splitStrC, splitStr);

printf("string before: %s\n", splitStrA);
int i = getMaxWordLength(splitStrA, delim);
printf("string after: %s\n", splitStrA);
printf("Longest word is %d characters\n", i);
printf("-----------------------\n");

printf("string before: %s\n", splitStrB);
getLongestWords(splitStrB, longestWordsOne, i, delim);
printf("string after: %s\n", splitStrB);
printf("Longest word(s), len %d: %s\n", i, longestWordsOne);
printf("-----------------------\n");

printf("string before: %s\n", splitStrC);
getLongestWords_r(splitStrC, longestWordsTwo, i, delim);
printf("string after: %s\n", splitStrC);
printf("Longest word(s), len %d: %s\n", i, longestWordsTwo);
printf("-----------------------\n");


return 0;
}
 
T

Tim Rentsch

DFS said:
Using strcat in the middle of looping thru the tokens actually changes
the tokens occurring after the point at which strcat is called.

Anybody know why? And how to fix it?

Thanks


====================================================================================


Output:

[dfs@home files]$ ./strtokens
* using strtok, with no calls to strcat
string before: Look at him working, darning his socks in the night
when there's nooboody there

token: Look
token: at
token: him
token: working,
token: darning
token: his
token: socks
token: in
token: the
token: night
token: when
token: there's
token: nooboody
token: there

string after: Look
Longest word is 8 characters
-----------------------------------------------------------------------------
* using strtok, with calls to strcat
string before: Look at him working, darning his socks in the night
when there's nooboody there

token: Look
token: at
token: him
token: working,
(this token has strlen(8), so strcat is called
and it alters the remaining tokens, including
those in the next call to strtok_r?!?)
token: da
token: h
token: s
token: s
token: c
token: s
token: the
token: ht
token: whe
token: the
token: e's
token: b
token: dy
token: the
token: e

string after: Look
Longest word(s), len 8: working,
-----------------------------------------------------------------------------
* using strtok_r, with calls to strcat
string before: Look at him working, darning his socks in the night
when there's nooboody there

token: L
token: at
token: h
token: m
token: w
token: da
token: h
token: s
token: s
token: c
token: s
token: the
token: ht
token: whe
token: the
token: e's
token: b
token: dy
token: the
token: e

string after: L
Longest word(s), len 8:
-----------------------------------------------------------------------------

Should be 'Longest word(s), len 8: working, nooboody'





====================================================================================


#include <stdio.h>
#include <string.h>


//GET LENGTH OF LONGEST WORD IN STRING
//strcat NOT USED
int getMaxWordLength(char *str, char *delim) {
char *token;
token = strtok(str, delim);
int maxWordLen = (int)strlen(token);

while(token != NULL)
{
printf("token: %s\n", token);
if ((int)strlen(token) > maxWordLen) {
maxWordLen = (int)strlen(token);
}
token = strtok(NULL, delim);
}
return maxWordLen;
}


//GET LIST OF LONGEST WORDS IN STRING.
//USE strtok AND strcat
int getLongestWords(char *str, char *longestWords, int maxWordLen,
char *delim) {
char *token;
token = strtok(str, delim);
while(token != NULL)
{
printf("token: %s\n", token);
if ((int)strlen(token) == maxWordLen) {
strcat(longestWords, token);
strcat(longestWords, " ");
}
token = strtok(NULL, delim);
}
return 0;
}

//GET LIST OF LONGEST WORDS IN STRING.
//USE strtok_r AND strcat
int getLongestWords_r(char *str, char *longestWords, int maxWordLen,
char *delim) {
char *token;
char* sptr;
token = strtok_r(str, delim, &sptr);
while(token != NULL)
{
printf("token: %s\n", token);
if ((int)strlen(token) == maxWordLen) {
strcat(longestWords, token);
strcat(longestWords, " ");
}
token = strtok_r(NULL, delim, &sptr );
}
return 0;
}



int main(void)
{

char splitStr[200] = "Look at him working, darning his socks in the
night when there's nooboody there";
char splitStrA[200] = "";
char splitStrB[200] = "";
char splitStrC[200] = "";
char delim[2] = " ";
char longestWordsOne[] = "";
=========================^^

char longestWordsTwo[] = "";
=========================^^


//MAKE COPIES OF STRING FOR USE WITH CALLS TO strtok AND strtok_r
strcpy(splitStrA, splitStr);
strcpy(splitStrB, splitStr);
strcpy(splitStrC, splitStr);

printf("string before: %s\n", splitStrA);
int i = getMaxWordLength(splitStrA, delim);
printf("string after: %s\n", splitStrA);
printf("Longest word is %d characters\n", i);
printf("-----------------------\n");

printf("string before: %s\n", splitStrB);
getLongestWords(splitStrB, longestWordsOne, i, delim);
printf("string after: %s\n", splitStrB);
printf("Longest word(s), len %d: %s\n", i, longestWordsOne);
printf("-----------------------\n");

printf("string before: %s\n", splitStrC);
getLongestWords_r(splitStrC, longestWordsTwo, i, delim);
printf("string after: %s\n", splitStrC);
printf("Longest word(s), len %d: %s\n", i, longestWordsTwo);
printf("-----------------------\n");


return 0;
}

Do you know what is wrong with the marked lines?
 
J

James Kuyper

Using strcat in the middle of looping thru the tokens actually changes
the tokens occurring after the point at which strcat is called.

Anybody know why? ....
char longestWordsOne[] = "";
char longestWordsTwo[] = "";

Those declarations are exactly equivalent to

char longestWordsOne[1] = { '\0' };

Because those arrays are only one byte long, the minute you copy any
non-empty strings into one of those arrays, you're overwriting the end
of the array, rendering the behavior of your entire program undefined.
It's quite likely that what you're overwriting is other variables in
your program, possibly including the source string for your strtok() call.

You need to either give those arrays a fixed length that is long enough
to hold everything that you will be writing into them, or you need to
change them into pointers to dynamically allocated memory that you
periodically realloc(), as needed, to keep it long enough. Make sure
that you realloc() BEFORE overwriting the end of the buffer, not afterwards.

When following the realloc() approach, I generally double the size of
the buffer every time it fills up, so I don't have to realloc() very
often. If you're worried about wasting space, when the loop is complete,
realloc() back to the currently used length.

If the number of words you're keeping track of is long enough, I'd
recommend keeping track of the current end of the buffer yourself, and
using strcpy(), rather than wasting time having strcat() search for the
end of the buffer.
 
K

Kaz Kylheku

Using strcat in the middle of looping thru the tokens actually changes
the tokens occurring after the point at which strcat is called.

Anybody know why? And how to fix it?

strcat is a destructive operation; it changes memory in-place.
It assumes that a string has space after it, and extends the string
into that space.

strtok is also destructive; it works by sticking null terminators into
the original string and returning pointers into that space.

The tokens returned by strtok do not have space after them for strcat,
except perhaps the very last token.
 
P

Phil Carmody

DFS said:
Using strcat in the middle of looping thru the tokens actually changes
the tokens occurring after the point at which strcat is called.

Anybody know why? And how to fix it?

Thanks


====================================================================================


Output:

[dfs@home files]$ ./strtokens
* using strtok, with no calls to strcat
string before: Look at him working, darning his socks in the night
when there's nooboody there

token: Look
token: at
token: him
token: working,
token: darning
token: his
token: socks
token: in
token: the
token: night
token: when
token: there's
token: nooboody
token: there

string after: Look
Longest word is 8 characters
-----------------------------------------------------------------------------
* using strtok, with calls to strcat
string before: Look at him working, darning his socks in the night
when there's nooboody there

token: Look
token: at
token: him
token: working,
(this token has strlen(8), so strcat is called
and it alters the remaining tokens, including
those in the next call to strtok_r?!?)
token: da

Looks like your delimiter's changed. Print out the
delimiter to check its value. Print out the addresses
of all those strings too.

[SNIP - code that overwrites delim]

Phil
 
D

DFS

Using strcat in the middle of looping thru the tokens actually changes
the tokens occurring after the point at which strcat is called.

Anybody know why? ...
char longestWordsOne[] = "";
char longestWordsTwo[] = "";

Those declarations are exactly equivalent to

char longestWordsOne[1] = { '\0' };

Because those arrays are only one byte long, the minute you copy any
non-empty strings into one of those arrays, you're overwriting the end
of the array, rendering the behavior of your entire program undefined.
It's quite likely that what you're overwriting is other variables in
your program,

I'll try to verify that might be happening.


possibly including the source string for your strtok() call.

strtok (and strtok_r) does that all by its lonesome:

1. string before: "Look at him working, darning his socks in the night
when there's nooboody there"

2. calls to strtok (even without using my invalid arrays)

3. string after: "Look"


You need to either give those arrays a fixed length that is long enough
to hold everything that you will be writing into them, or you need to
change them into pointers to dynamically allocated memory that you
periodically realloc(), as needed, to keep it long enough.
Make sure that you realloc() BEFORE overwriting the end of the buffer, not afterwards.


If you saw my function spec, I do pass the arry pointer to them:

int getLongestWords(char *str, char *longestWords, int maxWordLen, char
*delim)


I found that

char longestWordsOne[2] = "";

took care of the problem (on the surface anyway). The tokens were
unaffected, and it correctly stored the list of the longest words, even
when that list was 100 characters. Is that because I passed the array
as a pointer and it handled the memory automagically?


When following the realloc() approach, I generally double the size of
the buffer every time it fills up, so I don't have to realloc() very
often. If you're worried about wasting space, when the loop is complete,
realloc() back to the currently used length.

If the number of words you're keeping track of is long enough, I'd
recommend keeping track of the current end of the buffer yourself, and
using strcpy(), rather than wasting time having strcat() search for the
end of the buffer.

I'll look into realloc() approach. I'm new to C so I don't know
how/when/where to do it just yet.


Thanks for your reply!
 
D

DFS

I found that

char longestWordsOne[2] = "";

took care of the problem (on the surface anyway). The tokens were
unaffected, and it correctly stored the list of the longest words, even
when that list was 100 characters.

I lied. It stopped storing after 18 or so characters.
 
J

James Kuyper

Using strcat in the middle of looping thru the tokens actually changes
the tokens occurring after the point at which strcat is called.

Anybody know why? ...
char longestWordsOne[] = "";
char longestWordsTwo[] = "";

Those declarations are exactly equivalent to

char longestWordsOne[1] = { '\0' };

Because those arrays are only one byte long, the minute you copy any
non-empty strings into one of those arrays, you're overwriting the end
of the array, rendering the behavior of your entire program undefined.
It's quite likely that what you're overwriting is other variables in
your program,

I'll try to verify that might be happening.

You're certainly overwriting something; the only question is, what are
you overwriting. There's no guaranteed answer to that, the compiler is
free to allocate the memory for that array anywhere it wants to.
strtok (and strtok_r) does that all by its lonesome:

1. string before: "Look at him working, darning his socks in the night
when there's nooboody there"

2. calls to strtok (even without using my invalid arrays)

3. string after: "Look"

I hope you understand that strtok() is supposed to do that! If you don't
understand why, re-read the description of what strtok() does. If you're
still puzzled, ask me - I can explain.
If you saw my function spec, I do pass the arry pointer to them:

Yes, but it's a pointer to (the first element of) an array of length 1,
just barely long enough to store an empty string.
int getLongestWords(char *str, char *longestWords, int maxWordLen, char
*delim)


I found that

char longestWordsOne[2] = "";

took care of the problem (on the surface anyway). ...

That doesn't help much. That array is only long enough to store a string
of length 1, such as "a".

.... The tokens were
unaffected, and it correctly stored the list of the longest words, even
when that list was 100 characters. Is that because I passed the array
as a pointer and it handled the memory automagically?

No, it's probably because changing the length changed the position in
memory where that array was allocated. As a result, when your program
wrote past the end of the array, it overwrote something different. If
the behavior of your program seemed correct, then the memory it
overwrote was apparently not in use for any purpose that mattered to the
execution of your program.

In other words, the fact that your program worked was sheer luck. And
it's bad luck, not good luck. You've written a defective program that
accidentally works, leaving you unaware of the fact that it's defective.
Your program needs to allocate enough memory to store the longest string
you might want to store there. Figuring out how much is "enough" can be
a complicated issue, but it's one you need to address.
I'll look into realloc() approach. I'm new to C so I don't know
how/when/where to do it just yet.

If you're so new to C that you've never used any member of the malloc()
family before, don't attempt the dynamically allocate memory approach.
Just make the array big enough, and worry about using a more
sophisticated approach once you've learned to use malloc().
 
K

Keith Thompson

DFS said:
I found that

char longestWordsOne[2] = "";

took care of the problem (on the surface anyway). The tokens were
unaffected, and it correctly stored the list of the longest words, even
when that list was 100 characters.

I lied. It stopped storing after 18 or so characters.

Your program's behavior became undefined as soon as it stored to the
thrid byte of longestWordsOne (that's longestWordsOne[2]). If it didn't
blow up until after you stored 18 characters, that's just bad luck.
Yes, I said *bad* luck; it's better to detect errors sooner rather than
later.
 
B

Ben Bacarisse

DFS said:
I found that

char longestWordsOne[2] = "";

took care of the problem (on the surface anyway). The tokens were
unaffected, and it correctly stored the list of the longest words, even
when that list was 100 characters.

I lied. It stopped storing after 18 or so characters.

If you have access to a program called valgrind, get it and try it.
 
D

DFS

DFS said:
I found that

char longestWordsOne[2] = "";

took care of the problem (on the surface anyway). The tokens were
unaffected, and it correctly stored the list of the longest words, even
when that list was 100 characters.

I lied. It stopped storing after 18 or so characters.

If you have access to a program called valgrind, get it and try it.


below was run with char longestWords[2] = "";


int getLongestWords(char *str, char *longestWords, int maxWordLen, char
*delim) {
char *token;
token = strtok(str, delim);
while(token != NULL)
{
if ((int)strlen(token) == maxWordLen) {
strcat(longestWords, token);
strcat(longestWords, " ");
}
token = strtok(NULL, delim);
}
return 0;
}

==============================================================================

[dfs@home files]$ valgrind -v ./strtokens
==1532== Memcheck, a memory error detector
==1532== Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al.
==1532== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info
==1532== Command: ./strtokens
==1532==
--1532-- Valgrind options:
--1532-- -v
--1532-- Contents of /proc/version:
--1532-- Linux version 3.10.40-1-MANJARO (nobody@korrode) (gcc version
4.9.0 20140507 (prerelease) (GCC) ) #1 SMP Tue May 20 15:44:15 EST 2014
--1532-- Arch and hwcaps: AMD64, amd64-cx16-rdtscp-sse3
--1532-- Page sizes: currently 4096, max supported 4096
--1532-- Valgrind library directory: /usr/lib/valgrind
--1532-- Reading syms from /home/dfs/C/files/strtokens
--1532-- Reading syms from /usr/lib/ld-2.19.so
--1532-- Reading syms from /usr/lib/valgrind/memcheck-amd64-linux
--1532-- object doesn't have a symbol table
--1532-- object doesn't have a dynamic symbol table
--1532-- Scheduler: using generic scheduler lock implementation.
--1532-- Reading suppressions file: /usr/lib/valgrind/default.supp
==1532== embedded gdbserver: reading from
/tmp/vgdb-pipe-from-vgdb-to-1532-by-dfs-on-???
==1532== embedded gdbserver: writing to
/tmp/vgdb-pipe-to-vgdb-from-1532-by-dfs-on-???
==1532== embedded gdbserver: shared mem
/tmp/vgdb-pipe-shared-mem-vgdb-1532-by-dfs-on-???
==1532==
==1532== TO CONTROL THIS PROCESS USING vgdb (which you probably
==1532== don't want to do, unless you know exactly what you're doing,
==1532== or are doing some strange experiment):
==1532== /usr/lib/valgrind/../../bin/vgdb --pid=1532 ...command...
==1532==
==1532== TO DEBUG THIS PROCESS USING GDB: start GDB like this
==1532== /path/to/gdb ./strtokens
==1532== and then give GDB the following command
==1532== target remote | /usr/lib/valgrind/../../bin/vgdb --pid=1532
==1532== --pid is optional if only one valgrind process is running
==1532==
--1532-- REDIR: 0x4018260 (strlen) redirected to 0x380673f1 (???)
--1532-- Reading syms from /usr/lib/valgrind/vgpreload_core-amd64-linux.so
--1532-- object doesn't have a symbol table
--1532-- Reading syms from
/usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so
--1532-- object doesn't have a symbol table
--1532-- REDIR: 0x4018010 (index) redirected to 0x4c2b7f0 (index)
--1532-- REDIR: 0x4018230 (strcmp) redirected to 0x4c2c8d0 (strcmp)
--1532-- Reading syms from /usr/lib/libc-2.19.so
--1532-- REDIR: 0x4eb9e30 (strcasecmp) redirected to 0x4a23730
(_vgnU_ifunc_wrapper)
--1532-- REDIR: 0x4ebc120 (strncasecmp) redirected to 0x4a23730
(_vgnU_ifunc_wrapper)
--1532-- REDIR: 0x4eb9600 (memcpy@GLIBC_2.2.5) redirected to 0x4a23730
(_vgnU_ifunc_wrapper)
--1532-- REDIR: 0x4eb79e0 (rindex) redirected to 0x4c2b5e0 (rindex)
--1532-- REDIR: 0x4eb5710 (strcpy) redirected to 0x4a23730
(_vgnU_ifunc_wrapper)
--1532-- REDIR: 0x4ec93f0 (__strcpy_sse2_unaligned) redirected to
0x4c2bbc0 (strcpy)
--1532-- REDIR: 0x4ec07d0 (strchrnul) redirected to 0x4c2e5c0 (strchrnul)
String: abcdef abcdef abcdef abcdef abcdef abcdef abcdef abcdef abcdef
abcdef abcdef abcdef abcdef abcdef abcdef abcdef abcdef abcdef abcdef
--1532-- REDIR: 0x4eb5ce0 (strlen) redirected to 0x4c2bb80 (strlen)
--1532-- REDIR: 0x4eb3e30 (strcat) redirected to 0x4a23730
(_vgnU_ifunc_wrapper)
--1532-- REDIR: 0x4ecc210 (__strcat_sse2_unaligned) redirected to
0x4c2b830 (strcat)
Longest word(s) are 6 characters: abcdef abcdef abcdef
-----------------------
--1532-- REDIR: 0x4eb06b0 (free) redirected to 0x4c29930 (free)
==1532==
==1532== HEAP SUMMARY:
==1532== in use at exit: 0 bytes in 0 blocks
==1532== total heap usage: 0 allocs, 0 frees, 0 bytes allocated
==1532==
==1532== All heap blocks were freed -- no leaks are possible
==1532==
==1532== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
--1532--
--1532-- used_suppression: 1 dl-hack3-cond-1
/usr/lib/valgrind/default.supp:1196
==1532==
==1532== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)

==============================================================================


It reports 0 errors, but I have a feeling some of those REDIRs are
capturing problems.
 
S

Stephen Sprunk

I found that

char longestWordsOne[2] = "";

took care of the problem (on the surface anyway). The tokens were
unaffected, and it correctly stored the list of the longest words, even
when that list was 100 characters.

I lied. It stopped storing after 18 or so characters.

That's because you only allocated space for 2 characters; that it worked
for anything longer than that is a minor miracle.

S
 
J

Joe Pfeiffer

DFS said:
If you have access to a program called valgrind, get it and try it.


below was run with char longestWords[2] = "";
It reports 0 errors, but I have a feeling some of those REDIRs are
capturing problems.

While the valgrind suggestion was a good one, it doesn't turn out to be
helpful in diagnosing your problem (it's primarily intended for finding
errors in dynamic memory allocation).

Others have you given you what are almost certainly the right answers:

(1) yes, strtok() modifies its input. It replaces the delimiters with
'\0'

(2) you've got a classic buffer overflow. Make your output buffers big
enough to hold your result and see what happens.

And something I don't recall having seen:

(3) even when your buffer is big enough to accomodate your result, other
input can easily break it. You need to modify your code to make
buffer overflows impossible (start by looking up strncat() ).
 
K

Keith Thompson

Stephen Sprunk said:
I found that

char longestWordsOne[2] = "";

took care of the problem (on the surface anyway). The tokens were
unaffected, and it correctly stored the list of the longest words, even
when that list was 100 characters.

I lied. It stopped storing after 18 or so characters.

That's because you only allocated space for 2 characters; that it worked
for anything longer than that is a minor miracle.

s/a minor miracle/one of the infinitely many possible consequences of
undefined behavior/
 
B

Ben Bacarisse

DFS said:
It reports 0 errors, but I have a feeling some of those REDIRs are
capturing problems.

Maybe. I've never seen that sort of message. If you post the actual
code that was run, I'll try it here.
 
K

Keith Thompson

DFS said:
1. string before: "Look at him working, darning his socks in the night
when there's nooboody there"

2. calls to strtok (even without using my invalid arrays)

3. string after: "Look"

strtok() modifies the array by replacing characters with '\0'. When you
view it as a string, you only see "Look", but the rest of the characters
are still there in the array after the '\0'.
 
D

DFS

Maybe. I've never seen that sort of message. If you post the actual
code that was run, I'll try it here.


[dfs@home files]$ ./strtokens
String: Look at him working, darningg hishersh socks in the night when
there's nobody there
Longest word(s) are 8 characters:
1.working, 2.darningg
-----------------------

should be:
1.working, 2.darningg 3.hishersh


=======================================================================

#include <stdio.h>
#include <string.h>


//GET LENGTH OF LONGEST WORD IN STRING
int getMaxWordLength(char *str, char *delim) {
char *token;
token = strtok(str, delim);
int maxWordLen = (int)strlen(token);

while(token != NULL)
{
if ((int)strlen(token) > maxWordLen) {
maxWordLen = (int)strlen(token);
}
token = strtok(NULL, delim);
}
return maxWordLen;
}



//GET LIST OF LONGEST WORDS IN STRING (DON'T STORE REPEATS).
int getLongestWords(char *str, char *longestWords, int maxWordLen, char
*delim) {
char *token, wordCounter[3];
int i = 1;
token = strtok(str, delim);
while(token != NULL)
{
if ((int)strlen(token) == maxWordLen) {
if(strstr(longestWords, token) == 0) {
sprintf(wordCounter, "%d", i);
strcat(longestWords, wordCounter);
strcat(longestWords, ".");
strcat(longestWords, token);
strcat(longestWords, " ");
i++;
}
}
token = strtok(NULL, delim);
}
return 0;
}



int main(void)
{

char splitStr[200] = "Look at him working, darningg hishersh socks
in the night when there's nobody there";
char splitStrA[200] = "";
char splitStrB[200] = "";
char delim[2] = " ";
char longestWords[2] = "";

//MAKE COPIES OF STRING FOR USE WITH CALLS TO strtok
strcpy(splitStrA, splitStr);
strcpy(splitStrB, splitStr);

//OUTPUT
printf("String: %s\n", splitStrA);
int i = getMaxWordLength(splitStrA, delim);
getLongestWords(splitStrB, longestWords, i, delim);
printf("Longest word(s) are %d characters:\n", i);
printf("%s\n", longestWords);
printf("-----------------------\n");

return 0;
}

=======================================================================
fresh valgrind output for above code


[dfs@home files]$ valgrind -v ./strtokens
==2129== Memcheck, a memory error detector
==2129== Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al.
==2129== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info
==2129== Command: ./strtokens
==2129==
--2129-- Valgrind options:
--2129-- -v
--2129-- Contents of /proc/version:
--2129-- Linux version 3.10.40-1-MANJARO (nobody@korrode) (gcc version
4.9.0 20140507 (prerelease) (GCC) ) #1 SMP Tue May 20 15:44:15 EST 2014
--2129-- Arch and hwcaps: AMD64, amd64-cx16-rdtscp-sse3
--2129-- Page sizes: currently 4096, max supported 4096
--2129-- Valgrind library directory: /usr/lib/valgrind
--2129-- Reading syms from /home/dfs/C/files/strtokens
--2129-- Reading syms from /usr/lib/ld-2.19.so
--2129-- Reading syms from /usr/lib/valgrind/memcheck-amd64-linux
--2129-- object doesn't have a symbol table
--2129-- object doesn't have a dynamic symbol table
--2129-- Scheduler: using generic scheduler lock implementation.
--2129-- Reading suppressions file: /usr/lib/valgrind/default.supp
==2129== embedded gdbserver: reading from
/tmp/vgdb-pipe-from-vgdb-to-2129-by-dfs-on-???
==2129== embedded gdbserver: writing to
/tmp/vgdb-pipe-to-vgdb-from-2129-by-dfs-on-???
==2129== embedded gdbserver: shared mem
/tmp/vgdb-pipe-shared-mem-vgdb-2129-by-dfs-on-???
==2129==
==2129== TO CONTROL THIS PROCESS USING vgdb (which you probably
==2129== don't want to do, unless you know exactly what you're doing,
==2129== or are doing some strange experiment):
==2129== /usr/lib/valgrind/../../bin/vgdb --pid=2129 ...command...
==2129==
==2129== TO DEBUG THIS PROCESS USING GDB: start GDB like this
==2129== /path/to/gdb ./strtokens
==2129== and then give GDB the following command
==2129== target remote | /usr/lib/valgrind/../../bin/vgdb --pid=2129
==2129== --pid is optional if only one valgrind process is running
==2129==
--2129-- REDIR: 0x4018260 (strlen) redirected to 0x380673f1 (???)
--2129-- Reading syms from /usr/lib/valgrind/vgpreload_core-amd64-linux.so
--2129-- object doesn't have a symbol table
--2129-- Reading syms from
/usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so
--2129-- object doesn't have a symbol table
--2129-- REDIR: 0x4018010 (index) redirected to 0x4c2b7f0 (index)
--2129-- REDIR: 0x4018230 (strcmp) redirected to 0x4c2c8d0 (strcmp)
--2129-- Reading syms from /usr/lib/libc-2.19.so
--2129-- REDIR: 0x4eb9e30 (strcasecmp) redirected to 0x4a23730
(_vgnU_ifunc_wrapper)
--2129-- REDIR: 0x4ebc120 (strncasecmp) redirected to 0x4a23730
(_vgnU_ifunc_wrapper)
--2129-- REDIR: 0x4eb9600 (memcpy@GLIBC_2.2.5) redirected to 0x4a23730
(_vgnU_ifunc_wrapper)
--2129-- REDIR: 0x4eb79e0 (rindex) redirected to 0x4c2b5e0 (rindex)
--2129-- REDIR: 0x4eb5710 (strcpy) redirected to 0x4a23730
(_vgnU_ifunc_wrapper)
--2129-- REDIR: 0x4ec93f0 (__strcpy_sse2_unaligned) redirected to
0x4c2bbc0 (strcpy)
--2129-- REDIR: 0x4ec07d0 (strchrnul) redirected to 0x4c2e5c0 (strchrnul)
String: Look at him working, darningg hishersh socks in the night when
there's nobody there
--2129-- REDIR: 0x4eb5ce0 (strlen) redirected to 0x4c2bb80 (strlen)
--2129-- REDIR: 0x4eb8ab0 (strstr) redirected to 0x4a23730
(_vgnU_ifunc_wrapper)
--2129-- REDIR: 0x4ecda90 (__strstr_sse2_unaligned) redirected to
0x4c2e9d0 (strstr)
--2129-- REDIR: 0x4eb3e30 (strcat) redirected to 0x4a23730
(_vgnU_ifunc_wrapper)
--2129-- REDIR: 0x4ecc210 (__strcat_sse2_unaligned) redirected to
0x4c2b830 (strcat)
Longest word(s) are 8 characters:
1.working, 2.darningg
-----------------------
--2129-- REDIR: 0x4eb06b0 (free) redirected to 0x4c29930 (free)
==2129==
==2129== HEAP SUMMARY:
==2129== in use at exit: 0 bytes in 0 blocks
==2129== total heap usage: 0 allocs, 0 frees, 0 bytes allocated
==2129==
==2129== All heap blocks were freed -- no leaks are possible
==2129==
==2129== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
--2129--
--2129-- used_suppression: 1 dl-hack3-cond-1
/usr/lib/valgrind/default.supp:1196
==2129==
==2129== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)

=======================================================================
 
I

Ian Collins

DFS said:
DFS said:
On 06/11/2014 02:02 PM, DFS wrote:

I found that

char longestWordsOne[2] = "";

took care of the problem (on the surface anyway). The tokens were
unaffected, and it correctly stored the list of the longest words, even
when that list was 100 characters.

I lied. It stopped storing after 18 or so characters.

If you have access to a program called valgrind, get it and try it.


below was run with char longestWords[2] = "";
It reports 0 errors, but I have a feeling some of those REDIRs are
capturing problems.

It's much harder to catch problems with static rather than dynamic
memory use. Unless you stray outside of the stack frame it's tricky for
a tool to check array bounds.

By way of an example, if I change the offending lines in your original
code to

char* longestWordsOne = malloc(1);
char* longestWordsTwo = malloc(1);

dbx reports:

Attempting to read 1 byte at address 0x8061680
which is at start of heap block of size 1 byte at 0x8061680
This block was allocated from:
[1] main() at line 72 in "dfs.c"

stopped in getLongestWords at line 35 in file "dfs.c"
35 strcat(longestWords, token);
 
B

Ben Bacarisse

DFS said:
Maybe. I've never seen that sort of message. If you post the actual
code that was run, I'll try it here.

[dfs@home files]$ ./strtokens
String: Look at him working, darningg hishersh socks in the night when
there's nobody there
Longest word(s) are 8 characters:
1.working, 2.darningg

I get different error, but that's to be expected.
=======================================================================

#include <stdio.h>
#include <string.h>


//GET LENGTH OF LONGEST WORD IN STRING
int getMaxWordLength(char *str, char *delim) {
char *token;
token = strtok(str, delim);
int maxWordLen = (int)strlen(token);

while(token != NULL)
{
if ((int)strlen(token) > maxWordLen) {
maxWordLen = (int)strlen(token);
}
token = strtok(NULL, delim);
}
return maxWordLen;
}

You might consider re-writing this avoid using strtok. By using strtok,
you force the string to be copied (twice) and counting word lengths does
not need to copy the string at all.
//GET LIST OF LONGEST WORDS IN STRING (DON'T STORE REPEATS).
int getLongestWords(char *str, char *longestWords, int maxWordLen,
char *delim) {
char *token, wordCounter[3];
int i = 1;
token = strtok(str, delim);
while(token != NULL)
{
if ((int)strlen(token) == maxWordLen) {
if(strstr(longestWords, token) == 0) {
sprintf(wordCounter, "%d", i);
strcat(longestWords, wordCounter);
strcat(longestWords, ".");
strcat(longestWords, token);
strcat(longestWords, " ");

Why not sprintf(longestWords, "%d. %s ", i, token)? Then consider using
snprintf which can be told how much room is available in the array (it's
up to you to pass the correct size into the function, but that's not
hard.
i++;
}
}
token = strtok(NULL, delim);
}
return 0;
}



int main(void)
{

char splitStr[200] = "Look at him working, darningg hishersh socks
in the night when there's nobody there";
char splitStrA[200] = "";
char splitStrB[200] = "";
char delim[2] = " ";
char longestWords[2] = "";

You have 2 bytes (one taken with a null) and you write the whole output
string to it. That scribbles on goodness know what other objects. Make
this array big enough for the intended use (or pass its size to
getLongestWords and use snprintf).
//MAKE COPIES OF STRING FOR USE WITH CALLS TO strtok
strcpy(splitStrA, splitStr);
strcpy(splitStrB, splitStr);

//OUTPUT
printf("String: %s\n", splitStrA);
int i = getMaxWordLength(splitStrA, delim);
getLongestWords(splitStrB, longestWords, i, delim);
printf("Longest word(s) are %d characters:\n", i);
printf("%s\n", longestWords);
printf("-----------------------\n");

return 0;
}

=======================================================================
fresh valgrind output for above code

As has been said, valgrind turns out not to help much here, but don't
let that stop you using it. It will help out soon enough!
[dfs@home files]$ valgrind -v ./strtokens

(I don't think I've ever used -v. It seems to say too much as far as I
am concerned.)

<snip>
 
J

James Kuyper

s/a minor miracle/one of the infinitely many possible consequences of
undefined behavior/

Despite the infinitely many other possibilities, this particular
possibility is disproportionately high. It is not horribly unlikely, in
a program this simple, for those arrays to have been allocated somewhere
where overwriting the end of the arrays by a small amount has no obvious
effect.
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

Forum statistics

Threads
473,767
Messages
2,569,572
Members
45,046
Latest member
Gavizuho

Latest Threads

Top