Text to string program

M

Malcolm

This is a program to convert a text file to a C string.
It is offered as a service to the comp.lang.c community.

Originally I thought it would be a five minute job to program. In fact there
are subtle problems, such as the fact that a text file may wrap lines.

It will be appearing on my website as soon as I get update access, assuming
no one finds anything wrong with it.

/*
texttostring - converts a text file to a C string
By Malcolm McLean
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>

#include "texttostring.h"

char *fnametoid(char *path);
char *loadfile(FILE *fp);

/*
print out a message showing how to use the program
*/
int usage(void)
{
printf("Program to take in a text file and spit it out as a C string\n");
printf("Usage: texttostring <infile.txt> [id]\n");
printf(" infile.txt - text file\n");
printf(" id - identifier of string, default is file name\n");
exit(EXIT_FAILURE);
}

/*
main function
argv[1] - the name of the file to turn into a C string
argv[2] - (optional) name of the identifier to use
*/
int main(int argc, char **argv)
{
FILE *fp;
char *id;
char *str;
char *cstr;
long i;

if(argc != 2 && argc != 3)
usage();

if(argc == 3)
id = argv[2];
else
id = fnametoid(argv[1]);
if(!id)
{
fprintf(stderr, "Out of memory\n");
exit(EXIT_FAILURE);
}

fp = fopen(argv[1], "r");
if(!fp)
{
fprintf(stderr, "Couldn't open file\n");
exit(EXIT_FAILURE);
}
str = loadfile(fp);
fclose(fp);

if(str)
{
cstr = texttostring(str);
if(!cstr)
{
fprintf(stderr, "Out of memory\n");
exit(EXIT_FAILURE);
}
printf("char *%s = ", id);
for(i=0;cstr;i++)
fputc(cstr, stdout);
printf(";\n");
}
else
{
fprintf(stderr, "Out of memory\n");
exit(EXIT_FAILURE);
}

if(id != argv[2])
free(id);
free(str);
free(cstr);

return 0;
}

/*
convert a file name to a valid C identifier, by replacing
non-alphanmerics with dights.
*/
char *fnametoid(char *path)
{
char *answer;
char *ptr;

answer = malloc(strlen(path) + 2);
if(!answer)
return 0;

ptr = answer;
if(!isalpha(*path))
*ptr++ = 'a';
while(*path)
{
if(isalnum(*path))
*ptr++ = *path;
else
*ptr++ = '_';
path++;
}
*ptr = 0;

return answer;
}
/*
load a text file into memory

*/
char *loadfile(FILE *fp)
{
long len;
long i = 0;
char *answer;
char *temp;
int ch;

fseek(fp, 0, SEEK_END);
len = ftell(fp);
fseek(fp, 0, SEEK_SET);

answer = malloc(len + 100 + len/10);
if(!answer)
return 0;
len = len + 100 + len/10;
while( (ch = fgetc(fp)) != EOF)
{
answer[i++] = (char) ch;
if(i < 0)
{
free(answer);
return 0;
}
if(i >= len - 1)
{
temp = realloc(answer, len + 100 + len/10);
if(!temp)
{
free(answer);
return 0;
}
answer = temp;
len = len + 100 + len/10;
}
}
answer = 0;

return answer;
}

/* header file */
#ifndef texttostring_h
#define texttostring_h

char *texttostring(const char *str);
int escaped(int ch);
char escapechar(int ch);

#endif

/*
text to string functions
*/
#include <string.h>
#include <stdlib.h>

#include "texttostring.h"

static size_t linesbiggerthan(const char *str, size_t maxlen);

/*
convert a string to a C language string;
Params:
str - the string to convert
Returns: C version of string, 0 on out of memory
Notes: newlines are represented by breaks in the string.
*/
char *texttostring(const char *str)
{
size_t len = 0;
size_t i;
size_t j = 0;
size_t linelen = 0;
char *answer;

for(i=0;str;i++)
{
if(str == '\n')
len += 5;
else if(escaped(str))
len+=2;
else
len += 1;
}
len += linesbiggerthan(str, 100) * 3;
len++;
len += 2;
answer = malloc(len);
if(!answer)
return 0;
answer[j++] = '"';
for(i=0;str;i++)
{
if(str == '\n' && str[i+1] != 0)
{
answer[j++] = '\\';
answer[j++] = 'n';
answer[j++] = '\"';
answer[j++] = '\n';
answer[j++] = '\"';
linelen = 0;
}
else if(escaped(str))
{
answer[j++] = '\\';
answer[j++] = escapechar(str);
linelen++;
}
else
{
answer[j++] = str;
linelen++;
}
if(linelen == 100 && str[i+1] != '\n')
{
answer[j++] = '\"';
answer[j++] = '\n';
answer[j++] = '\"';
linelen = 0;
}
}
answer[j++] = '\"';
answer[j++] = 0;

return answer;
}

/*
test if a character is escaped in C
Params: ch - the character to test
Returns: 1 if escaped in C strings, else 0
*/
int escaped(int ch)
{
char *escapes = "\a\b\f\n\r\t\v\?\'\"\\";

if(ch == 0)
return 1;
return strchr(escapes, ch) ? 1 : 0;
}

/*
get the escape character to represent ch
Params: ch - an escaped character
Returns: character that stands in for it in esacpe sequence,
0 if ch is not an escaped character
*/
char escapechar(int ch)
{
char *escapes = "\a\b\f\n\r\t\v\?\'\"\\";
char *characters = "abfnrtv?\'\"\\";
char *ptr;

if(ch == 0)
return '0';
ptr = strchr(escapes, ch);
if(ptr)
return characters[ptr - escapes];
else
return 0;
}

/*
get the number of lines bigger than a certain value
*/
static size_t linesbiggerthan(const char *str, size_t maxlen)
{
size_t len = 0;
size_t answer = 0;

while(*str)
{
if(*str == '\n')
len = 0;
else
{
len++;
if(len > maxlen)
{
len = 0;
answer++;
}
}
str++;
}

return answer;
}
 
S

spibou

Malcolm said:
This is a program to convert a text file to a C string.
It is offered as a service to the comp.lang.c community.

You need to work on the documentation. Reading the
description above or usage() I wasn't at all sure what the
programme is supposed to do. I take it that the idea is that
the output of the programme will be of the form
char name_of_str[] = "......" such that if in some C programme
you have something like fprintf(new_file,"%s",name_of_str) ;
then the file where new_file points to will be identical to the
file where name_of_str came from. Have I got it right ?

By the way , why does usage() return int and not void ?

Spiros Bousbouras
 
B

Barry Schwarz

This is a program to convert a text file to a C string.
It is offered as a service to the comp.lang.c community.

Originally I thought it would be a five minute job to program. In fact there
are subtle problems, such as the fact that a text file may wrap lines.

It will be appearing on my website as soon as I get update access, assuming
no one finds anything wrong with it.

/*
texttostring - converts a text file to a C string
By Malcolm McLean
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>

#include "texttostring.h"

char *fnametoid(char *path);
char *loadfile(FILE *fp);

/*
print out a message showing how to use the program
*/
int usage(void)

Why do you declare the function as returning an int when it doesn't.
{
printf("Program to take in a text file and spit it out as a C string\n");
printf("Usage: texttostring <infile.txt> [id]\n");
printf(" infile.txt - text file\n");
printf(" id - identifier of string, default is file name\n");
exit(EXIT_FAILURE);
}

/*
main function
argv[1] - the name of the file to turn into a C string
argv[2] - (optional) name of the identifier to use
*/
int main(int argc, char **argv)
{
FILE *fp;
char *id;
char *str;
char *cstr;
long i;

if(argc != 2 && argc != 3)
usage();

if(argc == 3)
id = argv[2];
else
id = fnametoid(argv[1]);
if(!id)
{
fprintf(stderr, "Out of memory\n");
exit(EXIT_FAILURE);

Consistent indenting is a virtue.
}

fp = fopen(argv[1], "r");
if(!fp)
{
fprintf(stderr, "Couldn't open file\n");

It would be nice to identify the file in the error message.
exit(EXIT_FAILURE);
}
str = loadfile(fp);
fclose(fp);

if(str)
{
cstr = texttostring(str);
if(!cstr)
{
fprintf(stderr, "Out of memory\n");

This is the same error message used if fnametoid fails. Different
messages would at least give the user a clue where the problem was.
exit(EXIT_FAILURE);
}
printf("char *%s = ", id);
for(i=0;cstr;i++)
fputc(cstr, stdout);


Is there some benefit to writing one character at a time rather than
the entire string?
printf(";\n");
}
else

Which if does this else relate to? This is why indenting can help.
{
fprintf(stderr, "Out of memory\n");
exit(EXIT_FAILURE);
}

if(id != argv[2])
free(id);
free(str);
free(cstr);

return 0;
}

/*
convert a file name to a valid C identifier, by replacing
non-alphanmerics with dights.
*/
char *fnametoid(char *path)
{ snip
}
/*
load a text file into memory

*/
char *loadfile(FILE *fp)
{
long len;
long i = 0;
char *answer;
char *temp;
int ch;

fseek(fp, 0, SEEK_END);
len = ftell(fp);
fseek(fp, 0, SEEK_SET);

answer = malloc(len + 100 + len/10);

For a text file, the value returned by ftell is not necessarily the
number of characters in the file. See n1124, paragraph 7.19.9.4-2.
if(!answer)
return 0;
len = len + 100 + len/10;
while( (ch = fgetc(fp)) != EOF)

Is there some benefit to reading each character individually rather
than the entire file at once with fread?
{
answer[i++] = (char) ch;
if(i < 0)

i starts at 0 and is incremented. When do you expect it to be
negative? (Hint: overflow is not a good answer.)
{
free(answer);
return 0;
}
if(i >= len - 1)
{
temp = realloc(answer, len + 100 + len/10);
if(!temp)
{
free(answer);
return 0;
}
answer = temp;
len = len + 100 + len/10;
}
}
answer = 0;

return answer;
}


snip header and utility functions


Remove del for email
 
P

pete

Malcolm said:
offered as a

That reminds me too much of:
"... offered as a
contribution to education
to be used only by the State of Indiana free of cost
by paying any royalties whatever on the same,
provided it is accepted and adopted by the
official action of the Legislature of 1897."
Originally I thought it would be a five minute job to program. In fact there
are subtle problems, such as the fact that a text file may wrap lines.

It will be appearing on my website as soon as I get update access, assuming
no one finds anything wrong with it.

/*
texttostring - converts a text file to a C string
By Malcolm McLean
*/

This is my version of that:

/* BEGIN type_1.c */

#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <string.h>

#define ARGV_0 "type_1"

int line_to_string(FILE *fp, char **line, size_t *size);

int main(int argc, char *argv[])
{
int rc;
FILE *fd;
char *buff_ptr;
size_t buff_size;

buff_size = 0;
buff_ptr = NULL;
if (argc > 1) {
while (*++argv != NULL) {
fd = fopen(*argv, "r");
if (fd != NULL) {
while ((rc = line_to_string
(fd, &buff_ptr, &buff_size)) > 0)
{
switch (rc) {
case EOF:
if (buff_ptr != NULL
&& strlen(buff_ptr) > 0)
{
puts("rc equals EOF\n"
"The string in buff_ptr is:");
puts(buff_ptr);
}
break;
case 0:
puts("realloc returned a null pointer "
"value in line_to_string.");
if (buff_size > 1) {
puts("rc equals 0\n"
"The string in buff_ptr is:");
puts(buff_ptr);
}
break;
default:
puts(buff_ptr);
break;
}
}
fclose(fd);
} else {
fprintf(stderr,
"\nfopen() problem with \"%s\"\n", *argv);
break;
}
}
free(buff_ptr);
} else {
puts(
"Usage:\n>" ARGV_0
" <FILE_0.txt> <FILE_1.txt> <FILE_2.txt> ...\n"
);
}
return 0;
}

int line_to_string(FILE *fp, char **line, size_t *size)
{
int rc;
void *p;
size_t count;

count = 0;
while ((rc = getc(fp)) != EOF) {
++count;
if (count + 2 > *size) {
p = realloc(*line, count + 2);
if (p == NULL) {
if (*size > count) {
(*line)[count] = '\0';
(*line)[count - 1] = (char)rc;
} else {
ungetc(rc, fp);
}
count = 0;
break;
}
*line = p;
*size = count + 2;
}
if (rc == '\n') {
(*line)[count - 1] = '\0';
break;
}
(*line)[count - 1] = (char)rc;
}
if (rc != EOF) {
rc = count > INT_MAX ? INT_MAX : count;
} else {
if (*size > count) {
(*line)[count] = '\0';
}
}
return rc;
}

/* END type_1.c */
 
M

Malcolm

Barry Schwarz said:
This is a program to convert a text file to a C string.
It is offered as a service to the comp.lang.c community.

Originally I thought it would be a five minute job to program. In fact
there
are subtle problems, such as the fact that a text file may wrap lines.

It will be appearing on my website as soon as I get update access,
assuming
no one finds anything wrong with it.

/*
texttostring - converts a text file to a C string
By Malcolm McLean
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>

#include "texttostring.h"

char *fnametoid(char *path);
char *loadfile(FILE *fp);

/*
print out a message showing how to use the program
*/
int usage(void)

Why do you declare the function as returning an int when it doesn't.
Slip.
{
printf("Program to take in a text file and spit it out as a C
string\n");
printf("Usage: texttostring <infile.txt> [id]\n");
printf(" infile.txt - text file\n");
printf(" id - identifier of string, default is file name\n");
exit(EXIT_FAILURE);
}

/*
main function
argv[1] - the name of the file to turn into a C string
argv[2] - (optional) name of the identifier to use
*/
int main(int argc, char **argv)
{
FILE *fp;
char *id;
char *str;
char *cstr;
long i;

if(argc != 2 && argc != 3)
usage();

if(argc == 3)
id = argv[2];
else
id = fnametoid(argv[1]);
if(!id)
{
fprintf(stderr, "Out of memory\n");
exit(EXIT_FAILURE);

Consistent indenting is a virtue.
That is usenet's idea of filtering code.
}

fp = fopen(argv[1], "r");
if(!fp)
{
fprintf(stderr, "Couldn't open file\n");

It would be nice to identify the file in the error message.
Fair point.
This is the same error message used if fnametoid fails. Different
messages would at least give the user a clue where the problem was.
If the machine runs out of memory, that's all the user really wants to know.
A debugger would want to know which function caused the machine to run out,
but this program doesn't have any bugs :)-))
exit(EXIT_FAILURE);
}
printf("char *%s = ", id);
for(i=0;cstr;i++)
fputc(cstr, stdout);


Is there some benefit to writing one character at a time rather than
the entire string?

Yes. The string might be very long. Modern functions will probably tolerate
this, but you can't be sure. Better to be on the safe side.
printf(";\n");
}
else

Which if does this else relate to? This is why indenting can help.
{
fprintf(stderr, "Out of memory\n");
exit(EXIT_FAILURE);
}

if(id != argv[2])
free(id);
free(str);
free(cstr);

return 0;
}

/*
convert a file name to a valid C identifier, by replacing
non-alphanmerics with dights.
*/
char *fnametoid(char *path)
{ snip
}
/*
load a text file into memory

*/
char *loadfile(FILE *fp)
{
long len;
long i = 0;
char *answer;
char *temp;
int ch;

fseek(fp, 0, SEEK_END);
len = ftell(fp);
fseek(fp, 0, SEEK_SET);

answer = malloc(len + 100 + len/10);

For a text file, the value returned by ftell is not necessarily the
number of characters in the file. See n1124, paragraph 7.19.9.4-2.
Yes, I know. See below.
Is there some benefit to reading each character individually rather
than the entire file at once with fread?
The idea is that, should our return from ftell() be in error, we recover
from the problem.
{
answer[i++] = (char) ch;
if(i < 0)

i starts at 0 and is incremented. When do you expect it to be
negative? (Hint: overflow is not a good answer.)
Fair point. Should check i against INT_MAX.
{
free(answer);
return 0;
}
if(i >= len - 1)
{
temp = realloc(answer, len + 100 + len/10);
if(!temp)
{
free(answer);
return 0;
}
answer = temp;
len = len + 100 + len/10;
}
}
answer = 0;

return answer;
}


Surprising how much you can find in simple functions to slurp in files.
The moral is that when the language breaks to make such things difficult,
the attempts to code round the problem tend to introduce their own mistakes.
 
A

av

char *loadfile(FILE *fp)
{
long len;
long i = 0;
char *answer;
char *temp;
int ch;

fseek(fp, 0, SEEK_END);
len = ftell(fp);
fseek(fp, 0, SEEK_SET);

answer = malloc(len + 100 + len/10);
if(!answer)
return 0;
len = len + 100 + len/10;
while( (ch = fgetc(fp)) != EOF)
{
answer[i++] = (char) ch;
if(i < 0)
{
free(answer);
return 0;
}
if(i >= len - 1)
{
temp = realloc(answer, len + 100 + len/10);

i think this is an error
it should be at last
if(i >= len - 1)
{ len=2*i;
temp = realloc(answer, len + 100 + len/10);

or

if(i >= len - 1)
{ len=len + 100 + len/10;
temp = realloc(answer, len + 100 + len/10);

if(!temp)
{
free(answer);
return 0;
}
answer = temp;
len = len + 100 + len/10;
}
}
answer = 0;

return answer;
}
 
R

Richard Bos

Malcolm said:
On Sun, 27 Aug 2006 20:15:52 +0100, "Malcolm"
if(argc == 3)
id = argv[2];
else
id = fnametoid(argv[1]);
if(!id)
{
fprintf(stderr, "Out of memory\n");
exit(EXIT_FAILURE);

Consistent indenting is a virtue.
That is usenet's idea of filtering code.

No, it isn't.

if(argc == 3)
id = argv[2];
else
id = fnametoid(argv[1]);
if(!id)
{
fprintf(stderr, "Out of memory\n");
exit(EXIT_FAILURE);

Problems with indentation on Usenet? I don't think so. Problems with
Outhouse Express and tabs, perhaps.

Richard
 
B

Bill Reid

Barry Schwarz said:
This is a program to convert a text file to a C string.
It is offered as a service to the comp.lang.c community.

Originally I thought it would be a five minute job to program. In fact there
are subtle problems, such as the fact that a text file may wrap lines.

It will be appearing on my website as soon as I get update access, assuming
no one finds anything wrong with it.

/*
texttostring - converts a text file to a C string
By Malcolm McLean
*/
....
for(i=0;cstr;i++)
fputc(cstr, stdout);


Is there some benefit to writing one character at a time rather than
the entire string?....
fseek(fp, 0, SEEK_END);
len = ftell(fp);
fseek(fp, 0, SEEK_SET);

answer = malloc(len + 100 + len/10);

For a text file, the value returned by ftell is not necessarily the
number of characters in the file. See n1124, paragraph 7.19.9.4-2.

....
if(!answer)
return 0;
len = len + 100 + len/10;
while( (ch = fgetc(fp)) != EOF)

Is there some benefit to reading each character individually rather
than the entire file at once with fread?

Hmmmm...isn't this all equivalent to SOMETHING like the following,
ASSUMING you were working on a UNIX system and NEVER opened
like gigabyte-sized files?

#include <stdio.h>
#include <stdlib.h>
#include <alloc.h>
#include <sys\stat.h>

FILE *file_stream;
char *file_buffer;
char file_path[64];

unsigned read_file_to_buffer(char *buf_ptr,FILE *fl_strm) {
unsigned long file_size;
struct stat file_info;

if(fstat(fileno(fl_strm),&file_info)==0)
file_size=file_info.st_size;
else return FALSE;

if((buf_ptr=malloc(file_size))==NULL) {
printf("\nNot enough memory to allocate file buffer");
return FALSE;
}

if(fread(buf_ptr,file_size,1,fl_strm)<file_size)
return FALSE;

else return TRUE;
}

int main(void) {

printf("\nEnter the complete path and filename: ");
gets(file_path);

if ((file_stream= fopen(file_path, "r"))==NULL) {
fprintf(stderr,"\nCannot open input file.");
return 1;
}

if(!read_file_to_buffer(file_buffer,file_stream)) {
printf("\nCouldn't read the file into the buffer...sorry...");
return 1;
}

else {
printf("\nHere's your file from the buffer: \n");
puts(file_buffer);
}

free(file_buffer);

return 0;
}

I mean, is there some fundamental reason why this wouldn't work
given the constraints listed above?
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

No members online now.

Forum statistics

Threads
473,754
Messages
2,569,527
Members
44,998
Latest member
MarissaEub

Latest Threads

Top