A
alan
Hello world,
I'm trying to implement a (hopefully portable!) tagged pointer class.
Basically, I have my own allocator which will ensure alignment at 8-
byte boundaries (where "byte" is "size of a char"), and allocates
objects of type Generic. My tagged pointer class will support either
Generic* (tag = 0b000) or small integers (tag=0b001) for now (in the
future maybe add unicode chars, and/or Cons* etc.).
A rough sketch of what I intend to do:
class Generic;
class Object {
//public for now, will private this and make the
//external functions friends later
public:
union types {
void* ptr;
int num;
};
types dat;
Object(void* x) {
// in case sizeof(void*) != sizeof(int)
if(sizeof(int) > sizeof(void*)) {
dat.num = 0;
}
dat.ptr = x;
}
Object(int x) {
if(sizeof(void*) > sizeof(int)) {
dat.ptr = 0;
}
dat.num = x;
}
public:
static inline Object smallint(int x) {
x = (x << 3) + 0x1;
return Object(x);
}
static inline Object obj(Generic* x) {
if(x & 0x7) throw AlignmentError();
char* tmp = ((char*)(void*) x) + 0x0;
return Object((void*) tmp);
}
};
static inline char tag(Object x) {
if(sizeof(void*) > sizeof(int)) {
return (char)(((long) x.dat.ptr) & 0x7);
} else {
return (char)(x.dat.num & 0x7);
}
}
static inline bool is_smallint(Object x) {
return tag(x) == 0x1;
}
static inline bool is_obj(Object x) {
return tag(x) == 0x0;
}
static inline int as_smallint(Object x) {
if(!is_smallint(x)) throw TypeError();
return x.dat.num >> 3;
}
static inline Generic* as_obj(Object x) {
if(!is_obj(x)) throw TypeError();
char* tmp = ((char*) x.dat.ptr) - 0x0;
return (Generic*)(void*)tmp;
}
I've tested a version of the above code in a 32-bit x86 GNU/Linux
system with gcc, but I wonder if it's portable, say to big-endian
machines. Also, if optimizations are turned on (-O), it seems to
generate code which approximately looks like what I would expect for
explicit tagged pointers.
I would prefer to use a class-based solution for cleanliness, but I'm
concerned about using unions of potentially differently-sized objects,
especially if the code ends up in a big-endian system with a different
size for pointers and int.
I could also try to dig out the class/type/etc. I saw once which is
supposed to be an integral type that is the same size as the smallest
integer that can fit a void*.
Any pointers and suggestions, as well as analyses on how well this
might perform on various systems/compilers are welcome.
Sincerely,
AmkG
I'm trying to implement a (hopefully portable!) tagged pointer class.
Basically, I have my own allocator which will ensure alignment at 8-
byte boundaries (where "byte" is "size of a char"), and allocates
objects of type Generic. My tagged pointer class will support either
Generic* (tag = 0b000) or small integers (tag=0b001) for now (in the
future maybe add unicode chars, and/or Cons* etc.).
A rough sketch of what I intend to do:
class Generic;
class Object {
//public for now, will private this and make the
//external functions friends later
public:
union types {
void* ptr;
int num;
};
types dat;
Object(void* x) {
// in case sizeof(void*) != sizeof(int)
if(sizeof(int) > sizeof(void*)) {
dat.num = 0;
}
dat.ptr = x;
}
Object(int x) {
if(sizeof(void*) > sizeof(int)) {
dat.ptr = 0;
}
dat.num = x;
}
public:
static inline Object smallint(int x) {
x = (x << 3) + 0x1;
return Object(x);
}
static inline Object obj(Generic* x) {
if(x & 0x7) throw AlignmentError();
char* tmp = ((char*)(void*) x) + 0x0;
return Object((void*) tmp);
}
};
static inline char tag(Object x) {
if(sizeof(void*) > sizeof(int)) {
return (char)(((long) x.dat.ptr) & 0x7);
} else {
return (char)(x.dat.num & 0x7);
}
}
static inline bool is_smallint(Object x) {
return tag(x) == 0x1;
}
static inline bool is_obj(Object x) {
return tag(x) == 0x0;
}
static inline int as_smallint(Object x) {
if(!is_smallint(x)) throw TypeError();
return x.dat.num >> 3;
}
static inline Generic* as_obj(Object x) {
if(!is_obj(x)) throw TypeError();
char* tmp = ((char*) x.dat.ptr) - 0x0;
return (Generic*)(void*)tmp;
}
I've tested a version of the above code in a 32-bit x86 GNU/Linux
system with gcc, but I wonder if it's portable, say to big-endian
machines. Also, if optimizations are turned on (-O), it seems to
generate code which approximately looks like what I would expect for
explicit tagged pointers.
I would prefer to use a class-based solution for cleanliness, but I'm
concerned about using unions of potentially differently-sized objects,
especially if the code ends up in a big-endian system with a different
size for pointers and int.
I could also try to dig out the class/type/etc. I saw once which is
supposed to be an integral type that is the same size as the smallest
integer that can fit a void*.
Any pointers and suggestions, as well as analyses on how well this
might perform on various systems/compilers are welcome.
Sincerely,
AmkG