? about reading a comma delimited file

Discussion in 'C Programming' started by Hilary Cotter, Sep 4, 2003.

  1. Thanks for all the help you gave me yesterday.

    here is another question.

    I have a comma delimited file called redirect.txt which looks like
    this

    test, /test.htm
    test 123,/test123.htm

    I am reading these values and processing them, but it seems like the
    way I am doing it is not efficient. I was hoping for pointers on how
    to make this more efficient.

    // testparse.cpp : Defines the entry point for the console
    application.
    //

    #include "stdafx.h"

    //#include <stdio.h>
    //#include <stdlib.h>
    //#include <string.h>
    //#include <ctype.h>

    int main(int argc, char* argv[])
    {

    FILE *fp;
    int i;

    struct test
    {
    char in[100];
    char out[100];
    } my_test [150];


    fp =fopen("c:\\Redirect.txt", "r");
    if (!fp)
    {
    printf ("Can't open test file!\n");
    return 1;
    }
    i=0;

    while ((fscanf(fp, "%[a-z \\.] %[a-z \\.,]", &my_test.in)) !=
    EOF)
    {
    fgetc(fp);
    fscanf(fp, "%s", &my_test.out);
    fgetc(fp);
    printf("in %s out %s\n",my_test.in, my_test.out);
    ++i;
    }
    fclose( fp);
    return 0;
    }
     
    Hilary Cotter, Sep 4, 2003
    #1
    1. Advertising

  2. (Hilary Cotter) wrote in
    <>:

    >Thanks for all the help you gave me yesterday.
    >
    >here is another question.
    >
    >I have a comma delimited file called redirect.txt which looks like
    >this
    >
    >test, /test.htm
    >test 123,/test123.htm
    >
    >I am reading these values and processing them, but it seems like the
    >way I am doing it is not efficient. I was hoping for pointers on how
    >to make this more efficient.
    >

    <SNIP>
    I will not catch on the non-standard header file and some minor
    flaws in your code. Instead I'll do a sketch for an algorithm:

    - read the file char-by-char, checking for EOF
    - skip any leading whitespace
    - copy characters to your 1st buffer till you hit ','
    - skip the ',' and any following whitespace
    - copy characters to your 2nd buffer till you hit '\n'
    or whitespace
    - continue till EOF

    And, of course, make sure you're not producing any buffer overflows -
    consider dynamical memory (re)allocation for your buffers.
    Problems with implementing this? Don't hesitate to ask.

    Regards

    Irrwahn
    --
    Sig. Sic.
     
    Irrwahn Grausewitz, Sep 4, 2003
    #2
    1. Advertising

  3. Hilary Cotter

    Al Bowers Guest

    Hilary Cotter wrote:
    > Thanks for all the help you gave me yesterday.
    >
    > here is another question.
    >
    > I have a comma delimited file called redirect.txt which looks like
    > this
    >
    > test, /test.htm
    > test 123,/test123.htm
    >
    > I am reading these values and processing them, but it seems like the
    > way I am doing it is not efficient. I was hoping for pointers on how
    > to make this more efficient.
    >
    > // testparse.cpp : Defines the entry point for the console
    > application.
    > //
    >
    > #include "stdafx.h"
    >
    > //#include <stdio.h>
    > //#include <stdlib.h>
    > //#include <string.h>
    > //#include <ctype.h>
    >
    > int main(int argc, char* argv[])
    > {
    >
    > FILE *fp;
    > int i;
    >
    > struct test
    > {
    > char in[100];
    > char out[100];
    > } my_test [150];
    >
    >
    > fp =fopen("c:\\Redirect.txt", "r");
    > if (!fp)
    > {
    > printf ("Can't open test file!\n");
    > return 1;
    > }
    > i=0;
    >
    > while ((fscanf(fp, "%[a-z \\.] %[a-z \\.,]", &my_test.in)) !=
    > EOF)


    You might try. using the format string "%99[^,],%99s".
    Another possibility is using function strtok.

    > {
    > fgetc(fp);
    > fscanf(fp, "%s", &my_test.out);
    > fgetc(fp);
    > printf("in %s out %s\n",my_test.in, my_test.out);
    > ++i;
    > }
    > fclose( fp);
    > return 0;
    > }


    If the file's data is formatted as you describe with each line
    containing the "in" data and the "out" data then you could use
    function fgets and function sscanf.

    #include <stdio.h>
    #include <string.h>

    int main(int argc, char* argv[])
    {
    FILE *fp;
    int i,count;
    char buf[100], *s;
    struct test
    {
    char in[100];
    char out[100];
    }my_test [150];

    fp = fopen("c:\\Redirect.txt","r");
    if (!fp)
    {
    printf ("Can't open test file!\n");
    return 1;
    }
    for(count = 0;NULL != fgets(buf, sizeof buf, fp);count++)
    {
    if((s = strchr(buf,'\n')) != NULL) *s = '\0';
    else {
    puts("File format error");
    return 1;
    }
    if(2 != sscanf(buf,"%99[^,],%99s",my_test[count].in,
    my_test[count].out))
    {
    puts("File format error");
    return 1;
    }
    }
    fclose( fp);
    /* Testing */
    for(i = 0; i < count; i++)
    printf("my_test[%d].in = %s\n"
    "my_test[%d].out = %s\n\n",
    i,my_test.in,i,my_test.out);
    return 0;
    }

    --
    Al Bowers
    Tampa, Fl USA
    mailto: (remove the x)
    http://www.geocities.com/abowers822/
     
    Al Bowers, Sep 4, 2003
    #3
  4. Hilary Cotter

    j Guest

    "Hilary Cotter" <> wrote in message
    news:...
    > Thanks for all the help you gave me yesterday.
    >
    > here is another question.
    >
    > I have a comma delimited file called redirect.txt which looks like
    > this
    >
    > test, /test.htm
    > test 123,/test123.htm
    >
    > I am reading these values and processing them, but it seems like the
    > way I am doing it is not efficient. I was hoping for pointers on how
    > to make this more efficient.
    >
    > // testparse.cpp : Defines the entry point for the console
    > application.
    > //
    >
    > #include "stdafx.h"
    >
    > //#include <stdio.h>
    > //#include <stdlib.h>
    > //#include <string.h>
    > //#include <ctype.h>
    >
    > int main(int argc, char* argv[])
    > {
    >
    > FILE *fp;
    > int i;
    >
    > struct test
    > {
    > char in[100];
    > char out[100];
    > } my_test [150];
    >
    >
    > fp =fopen("c:\\Redirect.txt", "r");
    > if (!fp)
    > {
    > printf ("Can't open test file!\n");
    > return 1;
    > }
    > i=0;
    >
    > while ((fscanf(fp, "%[a-z \\.] %[a-z \\.,]", &my_test.in)) !=
    > EOF)
    > {
    > fgetc(fp);
    > fscanf(fp, "%s", &my_test.out);
    > fgetc(fp);
    > printf("in %s out %s\n",my_test.in, my_test.out);
    > ++i;
    > }
    > fclose( fp);
    > return 0;
    > }


    Why not instead read the entire file into memory(fseek & ftell to get file
    size, then malloc that size+1, then fread) and then tokenize(strtok) using
    the delimiter "\n". Then further split up each word (using delimiter ','
    with strchr)based on the current string you are operating on that was
    returned from strtok.

    Although, I am not sure if this is the most efficient way.

    Oh and, you might want to use ``indent -kr -nut'' next time you post your
    code(if you have a copy of indent) :)
     
    j, Sep 4, 2003
    #4
  5. On Thu, 04 Sep 2003 12:17:14 -0400, Al Bowers
    <> wrote:

    > Hilary Cotter wrote:

    <snip>
    > > while ((fscanf(fp, "%[a-z \\.] %[a-z \\.,]", &my_test.in)) !=
    > > EOF)

    >

    The range format a-z (rather than abcdef etc.) is nonstandard.

    > You might try. using the format string "%99[^,],%99s".


    While the length limit is certainly an improvement and the simpler
    complement class probably is (though you might want [^,\n] in case the
    input contains any misformatted line(s), both of these contain two
    conversions and one variable. You should either do one conversion
    here and the other in the body of the loop below, or both here, using
    %*c to skip instead of fgetc(), and none in the body.

    > Another possibility is using function strtok.
    >

    On lines read with fgets(), presumably, and copy the results with
    strcpy() plus overflow checks, or alternative like zero + strncat().

    > > {
    > > fgetc(fp);
    > > fscanf(fp, "%s", &my_test.out);


    This doesn't allow whitespace within the second value; did you want
    that?

    > > fgetc(fp);
    > > printf("in %s out %s\n",my_test.in, my_test.out);
    > > ++i;


    No protection about i overflowing the declared array size.

    > > }
    > > fclose( fp);
    > > return 0;
    > > }

    >
    > If the file's data is formatted as you describe with each line
    > containing the "in" data and the "out" data then you could use
    > function fgets and function sscanf.
    >

    Or strtok() and strcpy() or variant as above.

    <snip fgets then>
    > if(2 != sscanf(buf,"%99[^,],%99s",my_test[count].in,
    > my_test[count].out))


    This doesn't allow whitespace in second value, per above.

    <snip>

    - David.Thompson1 at worldnet.att.net
     
    Dave Thompson, Sep 15, 2003
    #5
    1. Advertising

Want to reply to this thread or ask your own question?

It takes just 2 minutes to sign up (and it's free!). Just click the sign up button to choose a username and then you can ask your own questions on the forum.
Similar Threads
  1. Luis Esteban Valencia

    Read Comma Delimited File

    Luis Esteban Valencia, Jul 27, 2005, in forum: ASP .Net
    Replies:
    4
    Views:
    2,834
    Paul Clement
    Jul 27, 2005
  2. John B. Lorenz

    Reading Comma Delimited Files

    John B. Lorenz, Jul 7, 2003, in forum: C Programming
    Replies:
    1
    Views:
    3,671
    Joe Wright
    Jul 8, 2003
  3. JustSomeGuy

    comma delimited text file.

    JustSomeGuy, Jul 28, 2006, in forum: C++
    Replies:
    4
    Views:
    982
    Default User
    Jul 29, 2006
  4. Elmo Watson

    Importing comma delimited file

    Elmo Watson, Aug 29, 2005, in forum: ASP General
    Replies:
    3
    Views:
    182
    Mark J. McGinty
    Sep 2, 2005
  5. Danny

    read from comma delimited file

    Danny, May 5, 2004, in forum: Perl Misc
    Replies:
    17
    Views:
    263
    David K. Wall
    May 11, 2004
Loading...

Share This Page