Would someone tell me if I'm on the wrong track?

luser- -droog · May 10, 2011

I'm trying to build a new memory allocation scheme for
my postscript interpreter to satisfy these requirements:
% pointer + offset + size + short tag <= 32bits
% 'garbage-collection'-ready (structure the heap for easy iteration)
% all memory is in one region for easy restore from disk

So I've written a prototype where the "pointer" is an index
into an extendable address table stored in the heap itself.
And it seems to work! But I don't trust it, you know?

I'd appreciate any comments the experts here could offer.

One apology. I'm trying to use mmap if available or malloc
as a fallback, but ifdefs, it seems, can't be made pretty.
Or, at least I have yet to discover how, unless it's just to
"put it in a file you don't need to look at".

569(1)01:27 AM

roto 0> make v
cc -g -Wall v.c -o v
570(1)01:27 AM

roto 0> v
put seven, got a 7
571(1)01:27 AM

roto 0> cat v.c
#define MMAP
#define _GNU_SOURCE

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>

#ifdef MMAP
#include <sys/mman.h>
#endif /* otherwise, use malloc/realloc/free */

void error(char *msg) {
fprintf(stderr, "%s\n", msg);
exit(EXIT_FAILURE);
}

unsigned pgsz /*= getpagesize()*/;

typedef struct {
unsigned char *base;
unsigned used;
unsigned max;
} mfile;

/* initialize the memory file */
void initmem(mfile *mem) {
#ifdef MMAP
mem->base = mmap(NULL, pgsz, PROT_READ|PROT_WRITE, MAP_SHARED
|MAP_ANONYMOUS,-1,0 );
if (mem->base == MAP_FAILED)
#else
mem->base = malloc(pgsz);
if (mem->base == NULL)
#endif
error("unable to initialize memory file");
mem->used = 0;
mem->max = pgsz;
}

/* destroy memory file */
void exitmem(mfile *mem) {
#ifdef MMAP
munmap(mem->base, mem->max);
#else
free(mem->base);
#endif
mem->base = NULL;
mem->used = 0;
mem->max = 0;
}

/* grow memory by sz */
void growmem(mfile *mem, unsigned sz) {
unsigned char *tmp;
if (sz < pgsz) sz = pgsz;
else sz = (sz/pgsz + 1) * pgsz;
sz += mem->max;
#ifdef MMAP
tmp = mremap(mem->base, mem->max, sz, MREMAP_MAYMOVE);
if (tmp == MAP_FAILED)
#else
tmp = realloc(mem->base, sz);
if (!tmp)
#endif
error("unable to grow memory");
mem->base = tmp;
mem->max = sz;
}

/* allocate memory, returns offset in memory file */
unsigned mfalloc(mfile *mem, unsigned sz) {
unsigned adr = mem->used;
if (sz + mem->used > mem->max) growmem(mem,sz);
mem->used += sz;
return adr;
}

#define TABSZ 1000
typedef struct {
unsigned nexttab;
unsigned nextent;
struct {
unsigned adr;
} tab[TABSZ];
} mtab;

/* allocate and initialize a new table */
unsigned initmtab(mfile *mem) {
unsigned adr;
adr = mfalloc(mem, sizeof(mtab)); /*create mtab at address 0*/
memset(mem->base + adr, 0, sizeof(mtab));
return adr;
}

/* allocate memory, returns table index */
unsigned mtalloc(mfile *mem, unsigned mtabloc, unsigned sz) {
mtab *tab = (void *)(mem->base + mtabloc);
if (tab->nextent >= TABSZ)
return mtalloc(mem, tab->nexttab, sz);
else {
unsigned ent = tab->nextent;
tab->nextent++;
tab->tab[ent].adr = mfalloc(mem, sz);
if (tab->nextent == TABSZ) {
tab->nexttab = initmtab(mem);
}
return ent;
}
}

/* fetch a value from a composite object */
void get(mfile *mem, unsigned ent, unsigned offset, unsigned sz, void
*dest) {
mtab *tab;
unsigned mtabloc = 0;
tab = (void *)(mem->base + mtabloc);
while (ent >= TABSZ) {
tab = (void *)(mem->base + mtabloc);
mtabloc = tab->nexttab;
ent -= TABSZ;
}
memcpy(dest, mem->base + tab->tab[ent].adr + offset*sz, sz);
}

/* put a value into a composite object */
void put(mfile *mem, unsigned ent, unsigned offset, unsigned sz, void
*src) {
mtab *tab;
unsigned mtabloc = 0;
tab = (void *)(mem->base + mtabloc);
while (ent >= TABSZ) {
tab = (void *)(mem->base + mtabloc);
mtabloc = tab->nexttab;
ent -= TABSZ;
}
memcpy(mem->base + tab->tab[ent].adr + offset*sz, src, sz);
}

mfile mem;

/* initialize everything */
void init(void) {
pgsz = getpagesize();
initmem(&mem);
(void)initmtab(&mem); /* init table zero */
}

/* destroy everything */
void xit(void) {
exitmem(&mem);
}

int main() {
init();
unsigned adr;
int seven = 7;
int ret;
adr = mtalloc(&mem, 0, sizeof seven);
put(&mem, adr, 0, sizeof seven, &seven);
get(&mem, adr, 0, sizeof seven, &ret);
printf("put seven, got a %d\n", ret);

xit();
return 0;
}

luser- -droog · May 10, 2011

I'm trying to build a new memory allocation scheme for
my postscript interpreter to satisfy these requirements:
% pointer + offset + size + short tag <= 32bits
% 'garbage-collection'-ready (structure the heap for easy iteration)
% all memory is in one region for easy restore from disk

So I've written a prototype where the "pointer" is an index
into an extendable address table stored in the heap itself.
And it seems to work! But I don't trust it, you know?

[...]

A little more testing always helps.
It still looks good. I feel a little better.

581(2)02:02 AM

roto 0> tail -20 v.c
printf("put seven, got a %d\n", ret);

unsigned adr2;
adr2 = mtalloc(&mem, 0, 8*sizeof seven);
put(&mem, adr2, 6, sizeof seven, &seven);
get(&mem, adr2, 6, sizeof seven, &ret);
printf("put seven in slot 7, got a %d\n", ret);

unsigned adr3;
char str[] = "beads in buddha's necklace";
char sret[sizeof str];
adr3 = mtalloc(&mem, 0, strlen(str)+1);
put(&mem, adr3, 0, sizeof str, str);
get(&mem, adr3, 0, sizeof str, sret);
printf("stored and retrieved %s\n", sret);

xit();
return 0;
}

582(2)02:02 AM

roto 0> v
put seven, got a 7
put seven in slot 7, got a 7
stored and retrieved beads in buddha's necklace
583(2)02:02 AM

roto 0>

luser- -droog · May 11, 2011

"luser- -droog" <[email protected]> ha scritto nel messaggio

what about if this mult above overflow?

That shouldn't be a problem for my application as sizes
will be limited to 16bit quantities.

what about if this sz += overflow?
i not understand well the remain (too much complex
for my little think)
but i think, it is better to see if sz overflow
and in input to write "if((int) sz <0) return -1;"
so that each function can fail.
never exist one function can not fail

Yeah, you're right. There should be a lot more checking
throughout.

luser- -droog · May 12, 2011

"luser- -droog" <[email protected]> ha scritto nel messaggio

I'm trying to build a new memory allocation scheme for
my postscript interpreter to satisfy these requirements:
% pointer + offset + size + short tag <= 32bits
% 'garbage-collection'-ready (structure the heap for easy iteration)
% all memory is in one region for easy restore from disk

Click to expand...

So I've written a prototype where the "pointer" is an index
into an extendable address table stored in the heap itself.
And it seems to work! But I don't trust it, you know?

Click to expand...

I'd appreciate any comments the experts here could offer. [...]
/* grow memory by sz */
void growmem(mfile *mem, unsigned sz) {
unsigned char *tmp;
if (sz < pgsz) sz = pgsz;
else sz = (sz/pgsz + 1) * pgsz;

Click to expand...

what about if this mult above overflow?

sz += mem->max;

Click to expand...

what about if this sz += overflow?
i not understand well the remain (too much complex
for my little think)
but i think, it is better to see if sz overflow
and in input to write "if((int) sz <0) return -1;"
so that each function can fail.
never exist one function can not fail

unsigned sz;
(int) sz < 0

Does this just check the high bit (assuming 2's complement)?

Seebs · May 12, 2011

unsigned sz;
(int) sz < 0

Does this just check the high bit (assuming 2's complement)?

Not necessarily.

If you're comfortable assuming 2s complement, I bet you can do better with:
sz > INT_MAX

-s

Seebs · May 12, 2011

I don't know,
but I don't see the relevance of ((int) sz < 0).

The relevance was in that question there.

On a typical 2s complement system, it is quite likely that if you convert
a value greater than INT_MAX but no larger than UINT_MAX to a signed integer
type, you will end up with a negative value. Thus,
sz > INT_MAX
and
(int) sz < 0
are both fancy ways of saying something much like:

(sz & ((UNIT_MAX) & ~(UINT_MAX >> 1))) != 0

(unless my lack of morning coffee made me get that wrong...)

-s

luser- -droog · May 12, 2011

luser- -droog said:
luser- -droog said:

"luser- -droog" <[email protected]> ha scritto nel messaggio
I'm trying to build a new memory allocation scheme for
my postscript interpreter to satisfy these requirements:
% pointer + offset + size + short tag <= 32bits
% 'garbage-collection'-ready (structure the heap for easy iteration)
% all memory is in one region for easy restore from disk
So I've written a prototype where the "pointer" is an index
into an extendable address table stored in the heap itself.
And it seems to work! But I don't trust it, you know?
I'd appreciate any comments the experts here could offer.

Click to expand...

[...]
/* grow memory by sz */
void growmem(mfile *mem, unsigned sz) {
unsigned char *tmp;
if (sz < pgsz) sz = pgsz;
else sz = (sz/pgsz + 1) * pgsz;
what about if this mult above overflow?
sz += mem->max;
what about if this sz += overflow?
i not understand well the remain (too much complex
for my little think)
but i think, it is better to see if sz overflow
and in input to write "if((int) sz <0) return -1;"
so that each function can fail.
never exist one function can not fail

Click to expand...

Click to expand...

unsigned sz;
(int) sz < 0

Click to expand...

Does this just check the high bit (assuming 2's complement)?

Click to expand...

I don't know,
but I don't see the relevance of ((int) sz < 0).

If you want to check for wrap around after (sz += mem->max), then

if (sz - mem->max > sz) ThenItWrapped;

I think that's closer to what io_x was suggesting.
For my current purposes sz will be no more than 8*USHORTMAX (if there
is such a thing). But mem-max itself could conceivably get pretty big.
(~2 gigs, right? assuming 32-bit words).

If I approach this limit, I think I can squeeze a little more life out
of this if I make mem->max and mem->used 64bits (since they cover the
whole space), but change the adresses to be relative to the table
location (mtabloc, above) instead of mem->base.

I'm eager to start building on top of this module, but I want to get
as close to "right" as is practical.

Seebs · May 12, 2011

But
why would you care about
whether or not an unsigned object
held a value which was greater than INT_MAX?

Reasons might include:
* Want to check whether high bit is set
* Want to be sure it's well-defined to convert it to int

-s

Peter Nilsson · May 13, 2011

unsigned sz;
(int) sz < 0

Conversion of an unsigned int value not in the range of int to
an int is unspecified in C90 and (to all intents and purposes)
undefined behaviour in C99.

Also note that UINT_MAX may be more than 2 * INT_MAX + 1.

Addition of two unsigned ints a and b will 'wrap' if and only
if (a > -1u - b) [i.e. mathematically a + b > UINT_MAX.]

luser- -droog · May 13, 2011

But
why would you care about
whether or not an unsigned object
held a value which was greater than INT_MAX?

Well, I don't, really. But the only response my code got
was a cryptic message from io_x containing this bit
of c as an example. So it caught my eye and I wondered:
what the heck is this trying to do?

I think the gist of what io_x was trying to say is:
don't trust your input values in a function;
check that they are all in a meaningful range.

I think he recommended this code as a quick-and-dirty
failsafe check for the validity of the sz parameter.
Hitting the high-bit would mean attempting to allocate
more than 1 gigabyte in my delicate little suite of
functions.

But he may have picked that parameter arbitrarily.
I'd be much more worried about overflowing the mem->max
or mem->used values.

Ben Bacarisse · May 13, 2011

Seebs said:
Not necessarily.

If you're comfortable assuming 2s complement, I bet you can do better with:
sz > INT_MAX

The assumption is not 2's complement as far as I can see. Your test
works in all of C's three permitted signed integer representations (how
can it not? -- it does not involve any signed integers). It fails if
unsigned int has more then one more value bit that signed int.

Ben Bacarisse · May 14, 2011

pete said:
Also if
unsigned int has less than one more value bit than signed int.

Yes. It would have been simpler just to say that it fails unless
unsigned int has exactly one more value bit than signed int.

Tim Rentsch · May 19, 2011

Peter Nilsson said:
Conversion of an unsigned int value not in the range of int to
an int is unspecified in C90 and (to all intents and purposes)
undefined behaviour in C99.

Correction: in both cases the behavior is implementation defined.
In C90 that is limited to producing an implementation-defined
value. C99 expands that just slightly to allow raising an
implementation-defined signal. This is different from UB both in
principle and in practice: in principle, because whatever occurs
must be documented; in practice, because the range of behaviors
that actually occur is (and always will be) much more well-behaved
than arbitrary undefined behavior. Invoking such behavior may not
be generally desirable, but it is not undefined behavior, not even
just to all intents and purposes.

Peter Nilsson · May 19, 2011

Tim Rentsch said:
Correction: in both cases the behavior is implementation defined.

So, unspecified but requiring the implementation to document its
choice.

In C90 that is limited to producing an implementation-defined
value. C99 expands that just slightly to allow raising an
implementation-defined signal. This is different from UB both
in principle
Perhaps.

and in practice:

Well, I suppose it's possible to do...

void bye(int sig) { _Exit(0); }

int main(void)
{
for (int sig = 0; sig < INT_MAX; sig++)
signal(sig, bye);
signal(INT_MAX, bye);
...safe and sound...
}

But that's not exactly practical on a 64-bit int machine.

Tim Rentsch · May 19, 2011

Peter Nilsson said:
So, unspecified but requiring the implementation to document its
choice.

Yes, that follows from the definition of implementation-defined
behavior. It's still important to make the distinction.

Well, I suppose it's possible to do...

void bye(int sig) { _Exit(0); }

int main(void)
{
for (int sig = 0; sig < INT_MAX; sig++)
signal(sig, bye);
signal(INT_MAX, bye);
...safe and sound...
}

But that's not exactly practical on a 64-bit int machine.

Oh come on, I know you're smarter than that. Because
the signal in question is implementation-defined, it
must be documented, and an appropriate signal call
can be written. More to the point, such calls are
necessary only if the program is going to run on
implementations that do in fact raise a signal in
these cases, and almost none do; simply checking
the documentation for the implementations that are
to be supported will reveal which ones need to set
a signal handler, and the most likely case is that
none will.

You seem to be assuming that it only makes sense to write
code that is guaranteed to be 100% portable for any
imaginable conforming implementation. Obviously that isn't
realistically practical for a program of any significant
size -- there are bound to be some rough edges here and
there. As non-portable transgressions go, converting an
out-of-range value to a signed type is among the least
likely to be problematic.

vim tricks for constructing header files	2	May 16, 2011
Splint warnings about dependent storage returned as implicitly only.	1	May 20, 2013
RSA implementation issues in public key pem loader function	0	May 21, 2025
Problem with offset based linked list	12	Aug 25, 2008
Virtual Machine implementation problem, Please help me to spot the bug	5	May 7, 2007
fslurp() critique	6	Sep 28, 2003
Any Ideas, please?	4	Feb 5, 2011
my little implementation of malloc	52	Jan 27, 2007

Would someone tell me if I'm on the wrong track?

luser- -droog

luser- -droog

luser- -droog

luser- -droog

Seebs

Seebs

luser- -droog

Seebs

Peter Nilsson

luser- -droog

Ben Bacarisse

Ben Bacarisse

Tim Rentsch

Peter Nilsson

Tim Rentsch

Ask a Question

Similar Threads

Members online

Forum statistics

Latest Threads