Would someone tell me if I'm on the wrong track?

L

luser- -droog

I'm trying to build a new memory allocation scheme for
my postscript interpreter to satisfy these requirements:
% pointer + offset + size + short tag <= 32bits
% 'garbage-collection'-ready (structure the heap for easy iteration)
% all memory is in one region for easy restore from disk

So I've written a prototype where the "pointer" is an index
into an extendable address table stored in the heap itself.
And it seems to work! But I don't trust it, you know?

I'd appreciate any comments the experts here could offer.

One apology. I'm trying to use mmap if available or malloc
as a fallback, but ifdefs, it seems, can't be made pretty.
Or, at least I have yet to discover how, unless it's just to
"put it in a file you don't need to look at". :)

569(1)01:27 AM:proto 0> make v
cc -g -Wall v.c -o v
570(1)01:27 AM:proto 0> v
put seven, got a 7
571(1)01:27 AM:proto 0> cat v.c
#define MMAP
#define _GNU_SOURCE

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>

#ifdef MMAP
#include <sys/mman.h>
#endif /* otherwise, use malloc/realloc/free */

void error(char *msg) {
fprintf(stderr, "%s\n", msg);
exit(EXIT_FAILURE);
}

unsigned pgsz /*= getpagesize()*/;

typedef struct {
unsigned char *base;
unsigned used;
unsigned max;
} mfile;

/* initialize the memory file */
void initmem(mfile *mem) {
#ifdef MMAP
mem->base = mmap(NULL, pgsz, PROT_READ|PROT_WRITE, MAP_SHARED
|MAP_ANONYMOUS,-1,0 );
if (mem->base == MAP_FAILED)
#else
mem->base = malloc(pgsz);
if (mem->base == NULL)
#endif
error("unable to initialize memory file");
mem->used = 0;
mem->max = pgsz;
}

/* destroy memory file */
void exitmem(mfile *mem) {
#ifdef MMAP
munmap(mem->base, mem->max);
#else
free(mem->base);
#endif
mem->base = NULL;
mem->used = 0;
mem->max = 0;
}

/* grow memory by sz */
void growmem(mfile *mem, unsigned sz) {
unsigned char *tmp;
if (sz < pgsz) sz = pgsz;
else sz = (sz/pgsz + 1) * pgsz;
sz += mem->max;
#ifdef MMAP
tmp = mremap(mem->base, mem->max, sz, MREMAP_MAYMOVE);
if (tmp == MAP_FAILED)
#else
tmp = realloc(mem->base, sz);
if (!tmp)
#endif
error("unable to grow memory");
mem->base = tmp;
mem->max = sz;
}

/* allocate memory, returns offset in memory file */
unsigned mfalloc(mfile *mem, unsigned sz) {
unsigned adr = mem->used;
if (sz + mem->used > mem->max) growmem(mem,sz);
mem->used += sz;
return adr;
}


#define TABSZ 1000
typedef struct {
unsigned nexttab;
unsigned nextent;
struct {
unsigned adr;
} tab[TABSZ];
} mtab;

/* allocate and initialize a new table */
unsigned initmtab(mfile *mem) {
unsigned adr;
adr = mfalloc(mem, sizeof(mtab)); /*create mtab at address 0*/
memset(mem->base + adr, 0, sizeof(mtab));
return adr;
}

/* allocate memory, returns table index */
unsigned mtalloc(mfile *mem, unsigned mtabloc, unsigned sz) {
mtab *tab = (void *)(mem->base + mtabloc);
if (tab->nextent >= TABSZ)
return mtalloc(mem, tab->nexttab, sz);
else {
unsigned ent = tab->nextent;
tab->nextent++;
tab->tab[ent].adr = mfalloc(mem, sz);
if (tab->nextent == TABSZ) {
tab->nexttab = initmtab(mem);
}
return ent;
}
}

/* fetch a value from a composite object */
void get(mfile *mem, unsigned ent, unsigned offset, unsigned sz, void
*dest) {
mtab *tab;
unsigned mtabloc = 0;
tab = (void *)(mem->base + mtabloc);
while (ent >= TABSZ) {
tab = (void *)(mem->base + mtabloc);
mtabloc = tab->nexttab;
ent -= TABSZ;
}
memcpy(dest, mem->base + tab->tab[ent].adr + offset*sz, sz);
}

/* put a value into a composite object */
void put(mfile *mem, unsigned ent, unsigned offset, unsigned sz, void
*src) {
mtab *tab;
unsigned mtabloc = 0;
tab = (void *)(mem->base + mtabloc);
while (ent >= TABSZ) {
tab = (void *)(mem->base + mtabloc);
mtabloc = tab->nexttab;
ent -= TABSZ;
}
memcpy(mem->base + tab->tab[ent].adr + offset*sz, src, sz);
}

mfile mem;

/* initialize everything */
void init(void) {
pgsz = getpagesize();
initmem(&mem);
(void)initmtab(&mem); /* init table zero */
}

/* destroy everything */
void xit(void) {
exitmem(&mem);
}


int main() {
init();
unsigned adr;
int seven = 7;
int ret;
adr = mtalloc(&mem, 0, sizeof seven);
put(&mem, adr, 0, sizeof seven, &seven);
get(&mem, adr, 0, sizeof seven, &ret);
printf("put seven, got a %d\n", ret);

xit();
return 0;
}
 
L

luser- -droog

I'm trying to build a new memory allocation scheme for
my postscript interpreter to satisfy these requirements:
% pointer + offset + size + short tag <= 32bits
% 'garbage-collection'-ready (structure the heap for easy iteration)
% all memory is in one region for easy restore from disk

So I've written a prototype where the "pointer" is an index
into an extendable address table stored in the heap itself.
And it seems to work! But I don't trust it, you know?
[...]

A little more testing always helps.
It still looks good. I feel a little better.

581(2)02:02 AM:proto 0> tail -20 v.c
printf("put seven, got a %d\n", ret);

unsigned adr2;
adr2 = mtalloc(&mem, 0, 8*sizeof seven);
put(&mem, adr2, 6, sizeof seven, &seven);
get(&mem, adr2, 6, sizeof seven, &ret);
printf("put seven in slot 7, got a %d\n", ret);

unsigned adr3;
char str[] = "beads in buddha's necklace";
char sret[sizeof str];
adr3 = mtalloc(&mem, 0, strlen(str)+1);
put(&mem, adr3, 0, sizeof str, str);
get(&mem, adr3, 0, sizeof str, sret);
printf("stored and retrieved %s\n", sret);

xit();
return 0;
}

582(2)02:02 AM:proto 0> v
put seven, got a 7
put seven in slot 7, got a 7
stored and retrieved beads in buddha's necklace
583(2)02:02 AM:proto 0>
 
L

luser- -droog

"luser- -droog" <[email protected]> ha scritto nel messaggio














what about if this mult above overflow?

That shouldn't be a problem for my application as sizes
will be limited to 16bit quantities.
what about if this sz += overflow?
i not understand well the remain (too much complex
for my little think)
but i think, it is better to see if sz overflow
and in input to write "if((int) sz <0)  return  -1;"
so that each function can fail.
never exist one function can not fail

Yeah, you're right. There should be a lot more checking
throughout.
 
L

luser- -droog

"luser- -droog" <[email protected]> ha scritto nel messaggio

I'm trying to build a new memory allocation scheme for
my postscript interpreter to satisfy these requirements:
% pointer + offset + size + short tag <= 32bits
% 'garbage-collection'-ready (structure the heap for easy iteration)
% all memory is in one region for easy restore from disk
So I've written a prototype where the "pointer" is an index
into an extendable address table stored in the heap itself.
And it seems to work! But I don't trust it, you know?
I'd appreciate any comments the experts here could offer. [...]
/* grow memory by sz */
void growmem(mfile *mem, unsigned sz) {
   unsigned char *tmp;
   if (sz < pgsz) sz = pgsz;
   else sz = (sz/pgsz + 1) * pgsz;

what about if this mult above overflow?
   sz += mem->max;

what about if this sz += overflow?
i not understand well the remain (too much complex
for my little think)
but i think, it is better to see if sz overflow
and in input to write "if((int) sz <0)  return  -1;"
so that each function can fail.
never exist one function can not fail

unsigned sz;
(int) sz < 0

Does this just check the high bit (assuming 2's complement)?
 
S

Seebs

unsigned sz;
(int) sz < 0

Does this just check the high bit (assuming 2's complement)?

Not necessarily.

If you're comfortable assuming 2s complement, I bet you can do better with:
sz > INT_MAX

-s
 
S

Seebs

I don't know,
but I don't see the relevance of ((int) sz < 0).

The relevance was in that question there.

On a typical 2s complement system, it is quite likely that if you convert
a value greater than INT_MAX but no larger than UINT_MAX to a signed integer
type, you will end up with a negative value. Thus,
sz > INT_MAX
and
(int) sz < 0
are both fancy ways of saying something much like:

(sz & ((UNIT_MAX) & ~(UINT_MAX >> 1))) != 0

(unless my lack of morning coffee made me get that wrong...)

-s
 
L

luser- -droog

luser- -droog said:
"luser- -droog" <[email protected]> ha scritto nel messaggio
I'm trying to build a new memory allocation scheme for
my postscript interpreter to satisfy these requirements:
% pointer + offset + size + short tag <= 32bits
% 'garbage-collection'-ready (structure the heap for easy iteration)
% all memory is in one region for easy restore from disk
So I've written a prototype where the "pointer" is an index
into an extendable address table stored in the heap itself.
And it seems to work! But I don't trust it, you know?
I'd appreciate any comments the experts here could offer.
[...]
/* grow memory by sz */
void growmem(mfile *mem, unsigned sz) {
   unsigned char *tmp;
   if (sz < pgsz) sz = pgsz;
   else sz = (sz/pgsz + 1) * pgsz;
what about if this mult above overflow?
   sz += mem->max;
what about if this sz += overflow?
i not understand well the remain (too much complex
for my little think)
but i think, it is better to see if sz overflow
and in input to write "if((int) sz <0)  return  -1;"
so that each function can fail.
never exist one function can not fail
unsigned sz;
(int) sz < 0
Does this just check the high bit (assuming 2's complement)?

I don't know,
but I don't see the relevance of ((int) sz < 0).

If you want to check for wrap around after (sz += mem->max), then

    if (sz - mem->max > sz) ThenItWrapped;

I think that's closer to what io_x was suggesting.
For my current purposes sz will be no more than 8*USHORTMAX (if there
is such a thing). But mem-max itself could conceivably get pretty big.
(~2 gigs, right? assuming 32-bit words).

If I approach this limit, I think I can squeeze a little more life out
of this if I make mem->max and mem->used 64bits (since they cover the
whole space), but change the adresses to be relative to the table
location (mtabloc, above) instead of mem->base.

I'm eager to start building on top of this module, but I want to get
as close to "right" as is practical.
 
S

Seebs

But
why would you care about
whether or not an unsigned object
held a value which was greater than INT_MAX?

Reasons might include:
* Want to check whether high bit is set
* Want to be sure it's well-defined to convert it to int

-s
 
P

Peter Nilsson

unsigned sz;
(int) sz < 0

Conversion of an unsigned int value not in the range of int to
an int is unspecified in C90 and (to all intents and purposes)
undefined behaviour in C99.

Also note that UINT_MAX may be more than 2 * INT_MAX + 1.

Addition of two unsigned ints a and b will 'wrap' if and only
if (a > -1u - b) [i.e. mathematically a + b > UINT_MAX.]
 
L

luser- -droog

But
why would you care about
whether or not an unsigned object
held a value which was greater than INT_MAX?

Well, I don't, really. But the only response my code got
was a cryptic message from io_x containing this bit
of c as an example. So it caught my eye and I wondered:
what the heck is this trying to do?

I think the gist of what io_x was trying to say is:
don't trust your input values in a function;
check that they are all in a meaningful range.

I think he recommended this code as a quick-and-dirty
failsafe check for the validity of the sz parameter.
Hitting the high-bit would mean attempting to allocate
more than 1 gigabyte in my delicate little suite of
functions.

But he may have picked that parameter arbitrarily.
I'd be much more worried about overflowing the mem->max
or mem->used values.
 
B

Ben Bacarisse

Seebs said:
Not necessarily.

If you're comfortable assuming 2s complement, I bet you can do better with:
sz > INT_MAX

The assumption is not 2's complement as far as I can see. Your test
works in all of C's three permitted signed integer representations (how
can it not? -- it does not involve any signed integers). It fails if
unsigned int has more then one more value bit that signed int.
 
B

Ben Bacarisse

pete said:
Also if
unsigned int has less than one more value bit than signed int.

Yes. It would have been simpler just to say that it fails unless
unsigned int has exactly one more value bit than signed int.
 
T

Tim Rentsch

Peter Nilsson said:
Conversion of an unsigned int value not in the range of int to
an int is unspecified in C90 and (to all intents and purposes)
undefined behaviour in C99.

Correction: in both cases the behavior is implementation defined.
In C90 that is limited to producing an implementation-defined
value. C99 expands that just slightly to allow raising an
implementation-defined signal. This is different from UB both in
principle and in practice: in principle, because whatever occurs
must be documented; in practice, because the range of behaviors
that actually occur is (and always will be) much more well-behaved
than arbitrary undefined behavior. Invoking such behavior may not
be generally desirable, but it is not undefined behavior, not even
just to all intents and purposes.
 
P

Peter Nilsson

Tim Rentsch said:
Correction:  in both cases the behavior is implementation defined.

So, unspecified but requiring the implementation to document its
choice.
In C90 that is limited to producing an implementation-defined
value.  C99 expands that just slightly to allow raising an
implementation-defined signal.  This is different from UB both
in principle
Perhaps.

and in practice:

Well, I suppose it's possible to do...

void bye(int sig) { _Exit(0); }

int main(void)
{
for (int sig = 0; sig < INT_MAX; sig++)
signal(sig, bye);
signal(INT_MAX, bye);
...safe and sound...
}

But that's not exactly practical on a 64-bit int machine.
 
T

Tim Rentsch

Peter Nilsson said:
So, unspecified but requiring the implementation to document its
choice.

Yes, that follows from the definition of implementation-defined
behavior. It's still important to make the distinction.
Well, I suppose it's possible to do...

void bye(int sig) { _Exit(0); }

int main(void)
{
for (int sig = 0; sig < INT_MAX; sig++)
signal(sig, bye);
signal(INT_MAX, bye);
...safe and sound...
}

But that's not exactly practical on a 64-bit int machine.

Oh come on, I know you're smarter than that. Because
the signal in question is implementation-defined, it
must be documented, and an appropriate signal call
can be written. More to the point, such calls are
necessary only if the program is going to run on
implementations that do in fact raise a signal in
these cases, and almost none do; simply checking
the documentation for the implementations that are
to be supported will reveal which ones need to set
a signal handler, and the most likely case is that
none will.

You seem to be assuming that it only makes sense to write
code that is guaranteed to be 100% portable for any
imaginable conforming implementation. Obviously that isn't
realistically practical for a program of any significant
size -- there are bound to be some rough edges here and
there. As non-portable transgressions go, converting an
out-of-range value to a signed type is among the least
likely to be problematic.
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

No members online now.

Forum statistics

Threads
473,764
Messages
2,569,567
Members
45,041
Latest member
RomeoFarnh

Latest Threads

Top