Help with program crashing...

Discussion in 'C Programming' started by santosh, Jan 19, 2006.

  1. santosh

    santosh Guest

    Hello all,

    I've put together a small program to count the number of characters and
    'words' in a text file. The minimum length of a word, (in terms of no.
    of characters), as well as word delimiting characters can be specified
    on the command line. The default delimiting characters built into the
    program are space, newline, tab, carriage return, form feed, vertical
    tab, comma and null. If a 'u' or 'U' is specified as the last command
    line argument, this default set is ignored and the characters in the
    penultimate command line argument are used.

    Now, the problem is that the program is terminated by the OS when the
    'minimum word length' argument, (the second command line argument after
    the filename), exceeds a certain value which seems to vary from file to
    file. As far as I can figure it out, the word counting code, in
    function words() shouldn't simply crash.

    Can anyone spot any logical mistake or other dubious calculation which
    might cause this behaviour?

    I compiled it with gcc -Wall -ansi -pedantic and there were three
    warnings, none of which seem to me to be able to affect the code.

    Thanks in advance.

    The code follows:

    /*
    * Usage = words filename [mwl] [delchars] [uddc]
    * Options in square brackets are optional.
    * filename - Path name of file to be scanned.
    * mwl - Specifies the minimum length, (in characters), a word must
    have
    * to be counted as such.
    * delchars - One or more characters which will be added to the set of

    * default word delimiting characters unless 'uddc' is specified
    * as 'u'/'U', in which case, the default delimiters will be
    * ignored.
    *
    * Default 'mwl' - 1 character.
    * Default 'delchars' - space, tab, newline, carriage return, form
    feed,
    * vertical tab
    */
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <ctype.h>

    /* A pointer to this structure is passed to the 'words()' function */
    struct words_args {
    FILE *fp; /* Pointer to a file opened in text mode for reading */
    size_t mwl; /* Minimum length (in chars) a 'word' must have */
    char *delchars; /* Null terminated array of custom delimiting chars */
    char uddc; /* If 'u' or 'U', ignores default delimiting chars */
    size_t *nwords; /* Upon return: No. of words in given file */
    size_t *tchars; /* Upon return: Total no. of characters in file */
    size_t *wchars; /* Upon return: Total chars making up 'words' */
    size_t *awl; /* Upon return: Average length of words (in chars) */
    };

    int words( struct words_args *args );
    /* Returns true if was file opened successfully */
    unsigned short int words_open_file
    ( char *clarg_filename, const char *mode, struct words_args *pf );
    /* Returns true if 'mwl' command line parameter is valid and
    convertable */
    unsigned short int words_conv_mwl( char *clarg_mwl, struct words_args
    *pm );
    /* Returns true if 'uddc' command line parameter is valid */
    unsigned short int words_check_uddc( char *clarg_uddc, struct
    words_args *pu );
    /* Returns true if file was closed successfully */
    unsigned short int words_close_file( FILE *fp );
    /* Prints to file stream, the values returned by 'words()' */
    void words_print_results( char *fn, struct words_args *ps, FILE *stream
    );

    void words_print_results( char *fn, struct words_args *ps, FILE *stream
    ) {
    fprintf( stream, "\n\nFile: %s\n\tTotal characters = %u\n\tTotal "
    "characters making up 'words' = %u\n\tTotal words = %u\n\t"
    "Average word length, (in characters) = %u\nFile size = "
    "%u characters x %u bytes per character = %u bytes.\n", fn,
    *ps->tchars, *ps->wchars, *ps->nwords, *ps->awl, *ps->tchars,
    sizeof(char), ((*ps->tchars) * sizeof(char)) );
    return;
    }

    unsigned short int words_open_file
    ( char *clarg_filename, const char *mode, struct words_args *pf ) {
    if( (pf->fp = fopen(clarg_filename, mode)) == NULL ) {
    fprintf( stderr, "\nFile: %s\n\tOpen failed. (Mode "
    "'%s').\n", clarg_filename, mode );
    return 0;
    }
    else
    return 1;
    }

    unsigned short int words_close_file( FILE *fp ) {
    int rv;

    rv = fclose(fp);
    if( rv == 0 )
    return 1;
    if( rv == EOF ) {
    fprintf( stderr, "\nwords_close_file(): Attempt to close file"
    " failed.\n" );
    return 0;
    }
    else {
    fprintf( stderr, "\nwords_close_file(): Attempt to close file"
    " failed.\n" );
    return 0;
    }
    }

    unsigned short int words_conv_mwl( char *clarg_mwl, struct words_args
    *pm ) {
    int sscanf_rv;
    char *str = NULL;

    /* Check if string contains only digit characters */
    for(str = clarg_mwl; *str != '\0'; ++str) {
    /* If not space and digit char signal error */
    if( !isdigit((int) *str) && !isspace((int) *str) ){
    fprintf( stderr, "\nwords_conv_mwl(): Bad parameter"
    ".\n\tParameter is: %s\n.", clarg_mwl );
    return 0;
    }
    }

    /* Attempt to convert string and assign to structure member */
    sscanf_rv = sscanf(clarg_mwl, "%u", &pm->mwl);
    if( (sscanf_rv == 0) || (sscanf_rv == EOF) ) {
    fprintf( stderr, "\nwords_conv_mwl(): sscanf() returned 0 or"
    " EOF.\n\tParameter is: %s\n", clarg_mwl );
    return 0;
    }
    else {
    if( sscanf_rv == 1 )
    return 1;
    else {
    fprintf( stderr, "\nwords_conv_mwl(): sscanf() "
    "returned %d.\n\tParameter is: %s\n.",
    sscanf_rv, clarg_mwl );
    return 0;
    }
    }
    }

    unsigned short int words_check_uddc( char *clarg_uddc, struct
    words_args *pu ) {
    char *cla_uddc = clarg_uddc, ch;
    size_t ctr;

    for(ctr = 0; *(cla_uddc + ctr) != '\0'; ++ctr) {
    if( isspace((int) *(cla_uddc + ctr)) )
    continue;
    else {
    ch = *(cla_uddc + ctr);
    if( (ch == 'u') || (ch == 'U') ) {
    pu->uddc = 'u';
    return 1;
    }
    else {
    fprintf( stderr, "\nwords_check_uddc(): "
    "Invalid parameter\n\t: %s\n",
    clarg_uddc );
    return 0;
    }
    }
    }
    return 0;
    }



    int main( int argc, char *argv[] ) {
    int words_rv = 0;
    struct words_args args, *pargs = NULL, *pmemb = NULL;
    size_t no_words = 0, no_tchars = 0, no_wchars = 0, avgwl = 0;
    const char *words_usage = "\nwords - Data about words and characters "
    "in a text file.\n\nUsage - words filename [mwl] [delchars] "
    "[uddc]\nOptions within square brackets are optional.\n\n"
    "filename - Relative or absolute path name of file to be "
    "examined.\nmwl - Minimum length, (in characters), a 'word' "
    "must have to be counted.\ndelchars - One or more characters "
    "to be added to the default set of\nword delimiting characters"
    ".\nuddc - If this is 'u' or 'U', the default, builtin set of"
    " delimiting characters\nwill be ignored, and the one "
    "specified on the command line will be used.\n";

    if( argc < 2 ) {
    fprintf( stderr, words_usage ); /* Print usage message */
    exit(EXIT_FAILURE);
    }

    pargs = &args; /* Initialise pointer to struct and member pointers */
    args.nwords = &no_words;
    args.tchars = &no_tchars;
    args.wchars = &no_wchars;
    args.awl = &avgwl;

    if( argc == 2 ) { /* Only 'filename' given */
    pmemb = &args;
    if( !words_open_file(argv[1], "rt", pmemb) )
    exit(EXIT_FAILURE);
    else {
    args.mwl = 1; /* Default value */
    args.delchars = NULL; /* Default value */
    args.uddc = 0; /* Default value */

    words_rv = words( pargs );

    if( words_rv == 1 ) {
    words_print_results( argv[1], pargs, stdout);
    if( !words_close_file( pargs->fp ) )
    exit(EXIT_FAILURE);
    else
    exit(EXIT_SUCCESS);
    }
    else {
    fprintf( stderr, "\nwords() returned %d.\n",
    words_rv );
    if( !words_close_file( pargs->fp ) )
    exit(EXIT_FAILURE);
    else
    exit(EXIT_SUCCESS);
    }
    }
    }

    if( argc == 3 ) { /* 'filename' and 'mwl' given */
    pmemb = &args;
    if( !words_open_file(argv[1], "rt", pmemb) )
    exit(EXIT_FAILURE);
    if( !words_conv_mwl(argv[2], pmemb) )
    exit(EXIT_FAILURE);

    args.delchars = NULL; /* Default value */
    args.uddc = 0; /* Default value */

    words_rv = words( pargs );

    if( words_rv == 1 ) {
    words_print_results( argv[1], pargs, stdout);
    if( !words_close_file( pargs->fp ) )
    exit(EXIT_FAILURE);
    else
    exit(EXIT_SUCCESS);
    }
    else {
    fprintf( stderr, "\nwords() returned %d.\n",
    words_rv );
    if( !words_close_file( pargs->fp ) )
    exit(EXIT_FAILURE);
    else
    exit(EXIT_SUCCESS);
    }
    }

    if( argc == 4 ) { /* 'filename', 'mwl' and 'delchars' are specified */
    pmemb = &args;
    if( !words_open_file(argv[1], "rt", pmemb) )
    exit(EXIT_FAILURE);
    if( !words_conv_mwl(argv[2], pmemb) )
    exit(EXIT_FAILURE);

    /* Assign 'delchars' CL argument string to corresp. pointer */
    args.delchars = argv[3];
    args.uddc = 0; /* Default value */

    words_rv = words( pargs );

    if( words_rv == 1 ) {
    words_print_results( argv[1], pargs, stdout);
    if( !words_close_file( pargs->fp ) )
    exit(EXIT_FAILURE);
    else
    exit(EXIT_SUCCESS);
    }
    else {
    fprintf( stderr, "\nwords() returned %d.\n",
    words_rv );
    if( !words_close_file( pargs->fp ) )
    exit(EXIT_FAILURE);
    else
    exit(EXIT_SUCCESS);
    }
    }

    if( argc == 5 ) { /* 'filename', 'mwl', 'delchars' & 'uddc' given */
    pmemb = &args;
    if( !words_open_file(argv[1], "rt", pmemb) )
    exit(EXIT_FAILURE);
    if( !words_conv_mwl(argv[2], pmemb) )
    exit(EXIT_FAILURE);
    /* Assign 'delchars' CL argument string to corresp. pointer */
    args.delchars = argv[3];
    /* Verify the last, i.e. the 'uddc', CL argument */
    if( !words_check_uddc(argv[4], pmemb) )
    exit(EXIT_FAILURE);

    words_rv = words( pargs );

    if( words_rv == 1 ) {
    words_print_results( argv[1], pargs, stdout);
    if( !words_close_file( pargs->fp ) )
    exit(EXIT_FAILURE);
    else
    exit(EXIT_SUCCESS);
    }
    else {
    fprintf( stderr, "\nwords() returned %d.\n",
    words_rv );
    if( !words_close_file( pargs->fp ) )
    exit(EXIT_FAILURE);
    else
    exit(EXIT_SUCCESS);
    }
    }

    /* Execution will reach here only if 'argc' and thus the command line
    * contains excess arguments. 'argc' should be at most 5. For now we
    * print a message and exit. Later we can modify the above if
    statement
    * to accept 5 arguments and ignore the rest.
    */
    if( argc > 5 ) {
    fprintf( stderr, "\nWarning: Only four command line arguments "
    "are supported.\n" );
    fprintf( stderr, words_usage );
    exit(EXIT_FAILURE);
    }
    }
    /*
    ----------------------------------------------------------------------------
    * END OF main()
    *
    ----------------------------------------------------------------------------
    */

    int words( struct words_args *args ) {
    int nc;
    size_t dss = 0, cwl = 0;
    const char *ddcs = " \n\t\r\f\v,"; /* Default delimiting characters */
    char *ds = NULL;

    /* Arguments verification */
    if( (args->delchars == NULL) && (args->uddc != 0) )
    return 0;
    if( args->fp == NULL )
    return 0;
    if( (args->nwords == NULL) || (args->tchars == NULL) )
    return 0;
    if( (args->wchars == NULL) || (args->awl == NULL) )
    return 0;

    /* If the custom delimiters string is not null, we verify that it ends

    * with a null character, and if so, based on whether 'args->uddc' is
    * 'U'/'u' or 0, we set the delimiters string 'ds' to point to the
    * custom delimiters string exclusively in the former case or allocate
    * space on the heap and by means of strcpy() and strcat(), build a
    * unified delimiters string, consisting of both the default
    delimiters
    * 'ddcs' and custom delimiters, in the latter case.
    */
    if( args->delchars != NULL ) {
    if( strchr(args->delchars, '\0') != NULL ) {
    if( args->uddc == 0 ) {
    dss = (strlen(ddcs)+strlen(args->delchars)+1);
    ds = malloc(dss);
    if( ds == NULL )
    return 0;
    else {
    if( strcpy(ds, ddcs) != ds )
    return 0;
    if( strcat(ds, args->delchars) != ds )
    return 0;
    }
    }
    else {
    if((args->uddc == 'u') || (args->uddc == 'U'))
    ds = args->delchars;
    else
    return 0;
    }
    }
    else
    return 0;
    }
    else {
    if( args->delchars == NULL )
    ds = ddcs;
    else
    return 0;
    }

    /* If 'mwl' is zero, replace with one. */
    if( args->mwl == 0 )
    args->mwl = 1;

    /* The actual word counting code... */
    while( (nc = fgetc(args->fp)) != EOF ) {
    if( strchr(ds, nc) != NULL ) {
    if( cwl >= args->mwl ) {
    ++(*args->nwords); /* Increment word count */
    ++(*args->tchars); /* Inc. total char count */
    cwl = 0; /* Reset current word length count */
    continue;
    }
    else {
    ++(*args->tchars);
    cwl = 0;
    continue;
    }
    }
    else {
    ++cwl; /* Increment the current world length counter */
    ++(*args->tchars); /* Increment total char count */
    ++(*args->wchars); /* Inc. the words chars count */
    continue;
    }
    }

    /* File read returned EOF. If it's file's end, then calculate
    * average word length and return successfully. If it's due to a read
    * error or otherwise, return failure.
    */
    if( feof(args->fp) ) {
    /* Average word length is total characters making up words
    * divided by the number of words
    */
    *args->awl = *args->wchars / *args->nwords;
    return 1;
    }
    else {
    if( ferror(args->fp) )
    return 0;
    else
    return 0;
    }
    }
    /* ------------------------------------ */
    /* END OF words.c */
    /* ------------------------------------ */
     
    santosh, Jan 19, 2006
    #1
    1. Advertising

  2. santosh

    boa Guest

    santosh wrote:
    > Hello all,
    >
    > I've put together a small program to count the number of characters and
    > 'words' in a text file. The minimum length of a word, (in terms of no.
    > of characters), as well as word delimiting characters can be specified
    > on the command line. The default delimiting characters built into the
    > program are space, newline, tab, carriage return, form feed, vertical
    > tab, comma and null. If a 'u' or 'U' is specified as the last command
    > line argument, this default set is ignored and the characters in the
    > penultimate command line argument are used.
    >
    > Now, the problem is that the program is terminated by the OS when the
    > 'minimum word length' argument, (the second command line argument after
    > the filename), exceeds a certain value which seems to vary from file to
    > file. As far as I can figure it out, the word counting code, in
    > function words() shouldn't simply crash.
    >
    > Can anyone spot any logical mistake or other dubious calculation which
    > might cause this behaviour?


    If 'minimum word length' is set very high, higher than the max length of
    any word in the input, you get a divide by zero in words(), approx at
    line 411.

    /* Average word length is total characters making up words
    * divided by the number of words
    */
    *args->awl = *args->wchars / *args->nwords;


    HTH
    boa
     
    boa, Jan 19, 2006
    #2
    1. Advertising

  3. santosh said:

    > Can anyone spot any logical mistake or other dubious calculation which
    > might cause this behaviour?


    *args->awl = *args->wchars / *args->nwords;

    *args->nwords is 0. Oops.

    Your program is very complicated, considering its simple task. Why, for
    example, are these struct members pointers?

    size_t *nwords;
    size_t *tchars;
    size_t *wchars;
    size_t *awl;

    --
    Richard Heathfield
    "Usenet is a strange place" - dmr 29/7/1999
    http://www.cpax.org.uk
    email: rjh at above domain (but drop the www, obviously)
     
    Richard Heathfield, Jan 19, 2006
    #3
  4. santosh

    santosh Guest

    Richard Heathfield wrote:
    > santosh said:
    >
    > > Can anyone spot any logical mistake or other dubious calculation which
    > > might cause this behaviour?

    >
    > *args->awl = *args->wchars / *args->nwords;
    >
    > *args->nwords is 0. Oops.


    Yes, should've spotted that. Thanks.

    > Your program is very complicated, considering its simple task. Why, for
    > example, are these struct members pointers?
    >
    > size_t *nwords;
    > size_t *tchars;
    > size_t *wchars;
    > size_t *awl;


    Well, originally I planned on passing the structure by value with the
    above pointers pointing to the appropriate variables in the caller, but
    later changed words() to accept a pointer to struct instead and forgot
    to change the above pointers to variables. I will do that now.

    Most of the code in the program is error checking at every turn,
    otherwise bad CL arguments will cause further misbehaviour. The actual
    word counting function seems simple enough to me, though as you point
    out above, I can avoid the constant indirections.
     
    santosh, Jan 19, 2006
    #4
  5. santosh said:

    > Well, originally I planned on passing the structure by value with the
    > above pointers pointing to the appropriate variables in the caller,


    Just as a rule of thumb, it's generally (i.e. practically always!) best to
    pass the address of the structure (as you have now chosen to do). If you
    don't want the called function to modify the structure whose address is
    passed, pass it as const struct T *p rather than struct T *p.


    --
    Richard Heathfield
    "Usenet is a strange place" - dmr 29/7/1999
    http://www.cpax.org.uk
    email: rjh at above domain (but drop the www, obviously)
     
    Richard Heathfield, Jan 19, 2006
    #5
  6. santosh

    santosh Guest

    Richard Heathfield wrote:
    > Just as a rule of thumb, it's generally (i.e. practically always!) best to
    > pass the address of the structure (as you have now chosen to do).


    Yes, I guess passing a copy of a structure is required far less often
    than by reference.

    > If you don't want the called function to modify the structure whose address is
    > passed, pass it as const struct T *p rather than struct T *p.


    Okay, but the callee can make a copy of the const struct T *p and use
    that.

    An array name is also a const pointer, but modification is allowed in
    that case...
     
    santosh, Jan 20, 2006
    #6
  7. santosh said:

    > Richard Heathfield wrote:
    >> If you don't want the called function to modify the structure whose
    >> address is passed, pass it as const struct T *p rather than struct T *p.

    >
    > Okay, but the callee can make a copy of the const struct T *p and use
    > that.


    Sure, but adding const ensures that the callee can't change the struct's
    contents itself.

    > An array name is also a const pointer,


    No, it isn't.

    --
    Richard Heathfield
    "Usenet is a strange place" - dmr 29/7/1999
    http://www.cpax.org.uk
    email: rjh at above domain (but drop the www, obviously)
     
    Richard Heathfield, Jan 20, 2006
    #7
  8. santosh

    santosh Guest

    Richard Heathfield wrote:
    > santosh said:
    > > An array name is also a const pointer,

    >
    > No, it isn't.


    Sorry about that. I'm still quite new to C.
    I should have said it's a pointer constant, not a const pointer.
     
    santosh, Jan 20, 2006
    #8
  9. santosh said:

    > Richard Heathfield wrote:
    >> santosh said:
    >> > An array name is also a const pointer,

    >>
    >> No, it isn't.

    >
    > Sorry about that. I'm still quite new to C.
    > I should have said it's a pointer constant, not a const pointer.


    It isn't even a pointer constant. It's an array name. When used in a value
    context, however, we must apply what Chris Torek calls "The Rule":

    A = *(A + i) by definition, from the Standard

    => &A = &*(A + i)

    => &A = (A + i)

    => &A[0] = (A + 0)

    => &A[0] = A

    (i.e. the name of an array, used in a value context, decays to a pointer to
    the array's first element).

    If this is what you meant, you are correct. Otherwise, you are not correct.

    --
    Richard Heathfield
    "Usenet is a strange place" - dmr 29/7/1999
    http://www.cpax.org.uk
    email: rjh at above domain (but drop the www, obviously)
     
    Richard Heathfield, Jan 20, 2006
    #9
  10. santosh

    Chris Torek Guest

    >>santosh said:
    >>> An array name is also a const pointer,


    >Richard Heathfield wrote:
    >> No, it isn't.


    In article <>
    santosh <> writes:
    >Sorry about that. I'm still quite new to C.
    >I should have said it's a pointer constant, not a const pointer.


    Even this is not quite right, for two reasons. One is somewhat
    minor, but still significant: it is not (or at least not necessarily)
    a *constant*:

    #include <stdio.h>
    void f(void) {
    char a[10];
    printf("%p\n", (void *)a);
    }
    void g(void) {
    f();
    }
    int main(void) {
    f();
    g();
    return 0;
    }

    This will often print two different numbers (although some systems
    will produce just one, in which case "a" does appear to be a constant
    after all). Note, however, that if we make "a" static, it is quite
    likely to produce the same number for both printf()s (since the
    output from %p is implementation defined, we cannot predict with
    100% certainty that it *will* be the same number, though).

    The more important reason not to say that an array "is" a pointer
    is because an array is *not* a pointer. An array is an array, and
    a pointer is a pointer. An array name used where a value is needed
    *becomes* a pointer, but this pointer is *computed* (at compile
    and/or run time as appropriate) by the compiler, in exactly the
    same way that the address of any other object is computed.

    See also <http://c-faq.com/aryptr/index.html> (all of section 6!).
    --
    In-Real-Life: Chris Torek, Wind River Systems
    Salt Lake City, UT, USA (40°39.22'N, 111°50.29'W) +1 801 277 2603
    email: forget about it http://web.torek.net/torek/index.html
    Reading email is like searching for food in the garbage, thanks to spammers.
     
    Chris Torek, Jan 20, 2006
    #10
    1. Advertising

Want to reply to this thread or ask your own question?

It takes just 2 minutes to sign up (and it's free!). Just click the sign up button to choose a username and then you can ask your own questions on the forum.
Similar Threads
  1. Daragoth
    Replies:
    4
    Views:
    422
    Dave Vandervies
    Aug 26, 2004
  2. Shark1
    Replies:
    3
    Views:
    396
    gooch
    Jan 20, 2005
  3. why this program is not crashing

    , Feb 11, 2005, in forum: C Programming
    Replies:
    20
    Views:
    683
    Dave Thompson
    Feb 21, 2005
  4. James

    why this program is crashing

    James, Apr 17, 2005, in forum: C Programming
    Replies:
    6
    Views:
    308
    Barry Schwarz
    Apr 18, 2005
  5. Replies:
    4
    Views:
    300
    Barry Schwarz
    Dec 2, 2005
Loading...

Share This Page