Richard said:
Which way would you guys recommened to best parse a multiline file
which contains two fields seperated by a tab. In this case its the
linux/proc/filesystems file a sample of which I have included below:
nodev usbfs
ext3
nodev fuse
vfat
ntfs
nodev binfmt_misc
udf
iso9660
The first field can be "empty" and concist of only a single tab
character. The seperator is a tab.
Is sscanf best suited to this? Or use strtok/strtok_r?
The field I am really interested in is the second one : any hints
& tips appreciated as to do this in the most efficient manor.
Use toksplit. Call with tokchar set to '\t'. Std C code follows:
/* ------- file toksplit.h ----------*/
#ifndef H_toksplit_h
# define H_toksplit_h
# ifdef __cplusplus
extern "C" {
# endif
#include <stddef.h>
/* copy over the next token from an input string, after
skipping leading blanks (or other whitespace?). The
token is terminated by the first appearance of tokchar,
or by the end of the source string.
The caller must supply sufficient space in token to
receive any token, Otherwise tokens will be truncated.
Returns: a pointer past the terminating tokchar.
This will happily return an infinity of empty tokens if
called with src pointing to the end of a string. Tokens
will never include a copy of tokchar.
released to Public Domain, by C.B. Falconer.
Published 2006-02-20. Attribution appreciated.
*/
const char *toksplit(const char *src, /* Source of tokens */
char tokchar, /* token delimiting char */
char *token, /* receiver of parsed token */
size_t lgh); /* length token can receive */
/* not including final '\0' */
# ifdef __cplusplus
}
# endif
#endif
/* ------- end file toksplit.h ----------*/
/* ------- file toksplit.c ----------*/
#include "toksplit.h"
/* copy over the next token from an input string, after
skipping leading blanks (or other whitespace?). The
token is terminated by the first appearance of tokchar,
or by the end of the source string.
The caller must supply sufficient space in token to
receive any token, Otherwise tokens will be truncated.
Returns: a pointer past the terminating tokchar.
This will happily return an infinity of empty tokens if
called with src pointing to the end of a string. Tokens
will never include a copy of tokchar.
A better name would be "strtkn", except that is reserved
for the system namespace. Change to that at your risk.
released to Public Domain, by C.B. Falconer.
Published 2006-02-20. Attribution appreciated.
Revised 2006-06-13
*/
const char *toksplit(const char *src, /* Source of tokens */
char tokchar, /* token delimiting char */
char *token, /* receiver of parsed token */
size_t lgh) /* length token can receive */
/* not including final '\0' */
{
if (src) {
while (' ' == *src) src++;
while (*src && (tokchar != *src)) {
if (lgh) {
*token++ = *src;
--lgh;
}
src++;
}
if (*src && (tokchar == *src)) src++;
}
*token = '\0';
return src;
} /* toksplit */
#ifdef TESTING
#include <stdio.h>
#define ABRsize 6 /* length of acceptable token abbreviations */
/* ---------------- */
static void showtoken(int i, char *tok)
{
putchar(i + '1'); putchar(':');
puts(tok);
} /* showtoken */
/* ---------------- */
int main(void)
{
char teststring[] = "This is a test, ,, abbrev, more";
const char *t, *s = teststring;
int i;
char token[ABRsize + 1];
puts(teststring);
t = s;
for (i = 0; i < 4; i++) {
t = toksplit(t, ',', token, ABRsize);
showtoken(i, token);
}
puts("\nHow to detect 'no more tokens' while truncating");
t = s; i = 0;
while (*t) {
t = toksplit(t, ',', token, 3);
showtoken(i, token);
i++;
}
puts("\nUsing blanks as token delimiters");
t = s; i = 0;
while (*t) {
t = toksplit(t, ' ', token, ABRsize);
showtoken(i, token);
i++;
}
return 0;
} /* main */
#endif
/* ------- end file toksplit.c ----------*/