T
Thomas Paul Diffenbach
I'm trying to write a space efficient string (nul terminated array
of char), storing the characters directly unless the number of
characters is too large to be so stored, and storing a pointer to
other storage otherwise.
Rather than lose space to padding by just using a sentinal bool to
indicate which member of the union has been written to, I want to
overload the last character of the directly stored string as the
sentinal.
I think what I'm doing is defined and legal, but I'd like you to
pick it apart in case I've missed something.
First, is it in fact undefined to write/read any possible padding
bits in a struct? If (as I suspect) it's not legal, am I in fact
avoiding doing so?
Basically, I have a union of char* and char[ n ], which is the
first member of a struct which has a char[ n ] as the second
member. In each case, n is sizeof( char* ).
Any string which, with nul terminator, is <= 2 * sizeof( char* ),
is stored directly. Since the nul terminator is the last character,
in this case the last character is 0 (or is forced to be).
Any string which, with nul terminator, is longer than 2 * sizeof(
char ), is copied to malloc'd memory, and the union's first member
is pointed to it. Then last character of the struct is set to 1.
Since the last character of the struct is NOT a part of the union,
it's legal to access it regardless of what member of the union has
been written to, to discover know which member of the union was
written to and can legally be read.
In case there is padding at the end of the struct, we calculate the
size we can directly write by finding the offset of the last
element of its last member, and not with the more obvious sizeof(
struct pstring ):
offsetof( struct pstring, m[ sizeof( char* ) ] )
(As m[ sizeof( char* ) ] is one past the end of the array, this
should be legal.)
Complete code can be found below.
Thanks,
Tom
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
union pOrA {
char* p ;
char a[ sizeof( char* ) ] ;
} ;
struct pstring {
/* pstring? A VERY UNINFORMATIVE NAME */
union pOrA poa ;
/* 1: does the standard guarantee there is no padding here? */
char m[ sizeof( char* ) ] ;
/* 2: does the standard guarantee there is no padding here? */
} ;
void write_pstring( struct pstring* p, const char* src, size_t slen
) {
char b ;
char* d ;
/* this fails if pstring is padded at the end!
if( b = ( slen >= sizeof( pstring ) ) ) { /*
/* this may be ok: */
if( b =
( slen >= offsetof( struct pstring, m[ sizeof( char* ) ] ) )
) {
d = p->poa.p = malloc( slen + 1 ) ;
} else {
d = (char*)p ;
}
strncpy( d, src, slen ) ;
d[ slen ] = 0 ;
p->m[ sizeof( char* ) - 1 ] = b ;
}
const char* read_pstring( struct pstring* p ) {
if( p->m[ sizeof( char* ) - 1 ] ) {
return p->poa.p ;
} else {
return &p->poa.a[ 0 ] ;
}
}
int main() {
struct pstring ps[ 16 ] ;
const char* src = "abcdefghijklmnopqrstuvwxyz" ;
int srclen = strlen( src ) ;
int i ;
for( i = 0 ;
i <= srclen && i < sizeof( ps ) / sizeof( ps[ 0 ] ) ;
++i ) {
write_pstring( &ps[ i ], src, i ) ;
printf( "ps[ %i ]: |%s|\n", i, read_pstring( &ps[ i ] ) ) ;
}
}
of char), storing the characters directly unless the number of
characters is too large to be so stored, and storing a pointer to
other storage otherwise.
Rather than lose space to padding by just using a sentinal bool to
indicate which member of the union has been written to, I want to
overload the last character of the directly stored string as the
sentinal.
I think what I'm doing is defined and legal, but I'd like you to
pick it apart in case I've missed something.
First, is it in fact undefined to write/read any possible padding
bits in a struct? If (as I suspect) it's not legal, am I in fact
avoiding doing so?
Basically, I have a union of char* and char[ n ], which is the
first member of a struct which has a char[ n ] as the second
member. In each case, n is sizeof( char* ).
Any string which, with nul terminator, is <= 2 * sizeof( char* ),
is stored directly. Since the nul terminator is the last character,
in this case the last character is 0 (or is forced to be).
Any string which, with nul terminator, is longer than 2 * sizeof(
char ), is copied to malloc'd memory, and the union's first member
is pointed to it. Then last character of the struct is set to 1.
Since the last character of the struct is NOT a part of the union,
it's legal to access it regardless of what member of the union has
been written to, to discover know which member of the union was
written to and can legally be read.
In case there is padding at the end of the struct, we calculate the
size we can directly write by finding the offset of the last
element of its last member, and not with the more obvious sizeof(
struct pstring ):
offsetof( struct pstring, m[ sizeof( char* ) ] )
(As m[ sizeof( char* ) ] is one past the end of the array, this
should be legal.)
Complete code can be found below.
Thanks,
Tom
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
union pOrA {
char* p ;
char a[ sizeof( char* ) ] ;
} ;
struct pstring {
/* pstring? A VERY UNINFORMATIVE NAME */
union pOrA poa ;
/* 1: does the standard guarantee there is no padding here? */
char m[ sizeof( char* ) ] ;
/* 2: does the standard guarantee there is no padding here? */
} ;
void write_pstring( struct pstring* p, const char* src, size_t slen
) {
char b ;
char* d ;
/* this fails if pstring is padded at the end!
if( b = ( slen >= sizeof( pstring ) ) ) { /*
/* this may be ok: */
if( b =
( slen >= offsetof( struct pstring, m[ sizeof( char* ) ] ) )
) {
d = p->poa.p = malloc( slen + 1 ) ;
} else {
d = (char*)p ;
}
strncpy( d, src, slen ) ;
d[ slen ] = 0 ;
p->m[ sizeof( char* ) - 1 ] = b ;
}
const char* read_pstring( struct pstring* p ) {
if( p->m[ sizeof( char* ) - 1 ] ) {
return p->poa.p ;
} else {
return &p->poa.a[ 0 ] ;
}
}
int main() {
struct pstring ps[ 16 ] ;
const char* src = "abcdefghijklmnopqrstuvwxyz" ;
int srclen = strlen( src ) ;
int i ;
for( i = 0 ;
i <= srclen && i < sizeof( ps ) / sizeof( ps[ 0 ] ) ;
++i ) {
write_pstring( &ps[ i ], src, i ) ;
printf( "ps[ %i ]: |%s|\n", i, read_pstring( &ps[ i ] ) ) ;
}
}