CPU simulator written in C

osmium · Jun 23, 2004

Dan Pop

OP:> >> I tried to search con google with no success.

Osmium:> >You are searching with the wrong target word. The proper word is
emulate,POP:> You're splitting hairs. Try googling for "8051 simulator".

If you think you will shock me by revealing that people sometimes use the
wrong word, you have failed miserably.

When using something such as google, where the basic commodity is the right
word spelled properly, it is best to use the right word if you know it. If
you consider that hair splitting, feel free to continue to think so.

Dingo · Jun 24, 2004

0x8000 = -32768
0x8000 + 1 = 0x8001 = -32767
0x8001 + 1 = 0x8002 = -32766
...

Show me where adding 1 to a negative binary number will cause the low
order bits to "run backwards" compared to adding 1 to an unsigned binary
number.

Repeat your demonstration using sign and magnitude representation in
place of two's complement.

RoSsIaCrIiLoIA · Jun 24, 2004

for me it is difficult write a portable and *fast* x86 cpu in C
(it has to execute an OS)
I'm a beginner but I would 'solve' the problem in this way:

___________________________________
#include <stdio.h>
#include <stdint.h> /* or stddef don't remember for uintxx_t */

struct r16{
uint8_t rl; /* uintXX_t would be in the standard c c89 */
uint8_t rh; /* so it is portable: it is ok in every cpu */
}; /* but in the x86 cpu a register is good for
signed
and unsigned calculation */

struct r32{
struct r16 ac;
uint16_t sn;
};

/* all global */
struct r32 eax_={0}, ebx_={0}, ecx_={0}, edx_={0};

/* they are static so until I don't write ={0} they are ={0} ie all 0
at the start of prog */

struct r32 esi_, edi_, ebp_, esp_, eip_;
uint16_t cs_, ds_, es_, ss_, fs_, gs_, flags_;

struct r32 *eax= &eax_, *ebx= &ebx_, *ecx= &ecx_, *edx= &edx_;
struct r32 *esi= &esi_, *edi= &edi_, *ebp= &ebp_, *esp= &esp_,
*eip = &eip_;
uint16_t *cs=&cs_, *ds=&ds_, *es=&es_, *ss=&ss_, *fs=&fs_,
*gs=&gs_, *falgs=&flags_;

But I prefer this:

#include <stdio.h>
#include <stdint.h> /* or stddef don't remember for uintxx_t */

struct r32{
uint32_t e;
uint16_t x;
uint8_t h;
uint8_t l;
};

/* all global */
struct r32 eax_={0}, ebx_={0}, ecx_={0}, edx_={0};
struct r32 esi_, edi_, ebp_, esp_, eip_;
uint16_t cs_, ds_, es_, ss_, fs_, gs_, flags_;

struct r32 *eax= &eax_, *ebx= &ebx_, *ecx= &ecx_, *edx= &edx_;
struct r32 *esi= &esi_, *edi= &edi_, *ebp= &ebp_, *esp= &esp_, *eip
= &eip_;
uint16_t *cs=&cs_, *ds=&ds_, *es=&es_, *ss=&ss_, *fs=&fs_,
*gs=&gs_, *falgs=&flags_;

#define ax eax->x
#define al eax->l
#define ah eax->h

#define bx ebx->x
#define bl ebx->l
#define bh ebx->h

#define cx ecx->x
#define cl ecx->l
#define ch ecx->h

#define dx edx->x
#define dl edx->l
#define dh edx->h

#define sp esp->x
#define bp ebp->x
#define si esi->x
#define di edi->x
#define ip eip->x
#define U unsigned
#define P printf

void assign(struct r32* a, uint32_t b)
{uint8_t l, h;
/*--------------*/
a->e = b;
a->x = (b>>16) & 0xFFFF;
a->h = (b>>8) & 0xFF;
a->l = b & 0xFF;
}

void Pr(struct r32* a)
{printf("%x", (int) a->e);
fflush(stdout);
}

void somma(struct r32* a,struct r32* b)
{assign(a, a->e + b->e);}

int main(void)
{
assign( eax , 0xFEFEFEFE); assign( ebx , 0xFAFAFAFA);
P("eax="); Pr(eax); P(" ebx="); Pr(ebx); P("\n");
assign(ecx, 50000); assign(edx, 512341);
somma(ecx, edx);
P("ecx="); Pr(ecx); P(" edx="); Pr(edx); P("\n");
printf("somma=%x", (int)(50000 + 512341) );
return 0;
}

RoSsIaCrIiLoIA · Jun 24, 2004

But I prefer this:

#include <stdio.h>
#include <stdint.h> /* or stddef don't remember for uintxx_t */

struct r32{
uint32_t e;
uint16_t x;
uint8_t h;
uint8_t l;
};

/* all global */
struct r32 eax_={0}, ebx_={0}, ecx_={0}, edx_={0};
struct r32 esi_, edi_, ebp_, esp_, eip_;
uint16_t cs_, ds_, es_, ss_, fs_, gs_, flags_;

better this:

#include <stdio.h>
#include <stdint.h> /* or stddef don't remember for uintxx_t */

/* all global */
uint32_t eax, ebx, ecx, edx;
uint32_t esi, edi, ebp, esp, eip;
uint16_t cs, ds, es, ss, fs, gs, flags;

union u{
uint32_t e;
uint16_t x;
uint8_t l;
};

#define ax (eax & 0xFFFF)
#define al (eax & 0xFF )
#define ah ( (eax >>8) & 0xFF )

#define bx (ebx & 0xFFFF)
#define bl (ebx & 0xFF )
#define bh ( (ebx >>8) & 0xFF )

#define cx (ecx & 0xFFFF)
#define cl (ecx & 0xFF )
#define ch ( (ecx >>8) & 0xFF )

#define dx (edx & 0xFFFF)
#define dl (edx & 0xFF )
#define dh ( (edx >>8) & 0xFF )

#define sp esp & 0xFFFF
#define bp ebp & 0xFFFF
#define si esi & 0xFFFF
#define di edi & 0xFFFF
#define ip eip & 0xFFFF
#define U unsigned
#define P printf

void inc_x(uint32_t* a)
{uint16_t r;
/*-------------*/
r = *a & 0xFFFF; ++r;
// P(" r=%x ", (int) r );
*a = *a & ((uint32_t) 0xFFFF0000 | r);
}

void inc_l(uint32_t* a)
{uint8_t r;
/*-------------*/
r = *a & 0xFF; ++r;
*a = *a & ((uint32_t)0xFFFFFF00 | r);
}

/* don't know if it is ok */
void inc_l1(uint32_t* a)
{++( (*(union u*)a).l );}

/* don't know if it is ok */
void inc_x1(uint32_t* a)
{++( (*(union u*)a).x );}

void inc_h(uint32_t* a)
{uint8_t r;
/*-------------*/
r = (*a>>8) & 0xFF; ++r;
*a = *a & ( 0xFFFF00FF | (uint32_t) r << 8 );
}

int main(void)
{
eax = 0xFEFEFEFE; ebx = 0xFAFAFAFA;
P("eax=0x%x ebx=Ox%x\n", (int) eax, (int) ebx);
ecx=50000; edx=512341;
ecx += edx;
printf("somma=0x%x\n", (int)(50000 + 512341) );
eax=0xFFFFFFFF;
printf("eax=0x%x ", (int) eax );
inc_x(&eax); /* inc ax */
P("inc ax -> eax=0x%x ax=0x%x\n", (int) eax, (int) ax );

eax=0xFFFFFFFF;
printf("eax=0x%x ", (int) eax );
inc_x1(&eax); /* inc ax [& union] */
P("inc ax -> eax=0x%x ax=0x%x\n", (int) eax, (int) ax );

eax=0xFFFFFFFF;
printf("eax=0x%x ", (int) eax );
inc_l(&eax); /* inc al */
P("inc ax -> eax=0x%x al=0x%x\n", (int) eax, (int) al );

eax=0xFFFFFFFF;
printf("eax=0x%x ", (int) eax );
inc_h(&eax); /* inc ah */
P("inc ah -> eax=0x%x ah=0x%x\n", (int) eax, (int) ah );

return 0;
}

Alex Fraser · Jun 24, 2004

Case - said:
I didn't say what kind of int, so technically what you propose
is covered by my statement ;-)
;-)

Yes, you're right, the PC is best typed as unsigned. I'm not sure
about the registers. Values in registers are seen as 2-s complement
by instruction in at least some CPU's (e.g., MIPS has pairs of
similar instructions for singed and unsigned register operand).

It's straightforward to simulate a 2's complement CPU using unsigned
integers, with the semantics C guarantees. This is for the same reason that
2's complement is popular.

Alex

Dan Pop · Jun 24, 2004

In said:
Dan Pop

OP:> >> I tried to search con google with no success.

Osmium:> >You are searching with the wrong target word. The proper word is
emulate,
POP:> You're splitting hairs. Try googling for "8051 simulator".

If you think you will shock me by revealing that people sometimes use the
wrong word, you have failed miserably.

Many of these people were vendors of such products. But, of course,
you know better than them...

If you were capable to conceive the notion that you might be wrong, the
google exercise would have been quite useful to you.

Contrary to your narrow ideas, there are many levels of software
CPU simulation, serving different purposes, and none of them is less
entitled to the name "simulation" than the others:

- sub-transistor level simulation: used for checking the correctness of
the chip design.

- transistor level simulation: used for checking the correctness of the
CPU implementation.

- gate level simulation: used for checking the correctness of the CPU
design.

- register to register transfer level simulation: used to evaluate the
CPU design performance.

- functional simulation, kernel mode: used for developing operating
system kernels and freestanding applications.

- functional simulation, user mode: used for developing hosted
applications (much faster than kernel mode simulation, as the OS kernel
functionality is provided by the simulator, instead of having the
simulated processor execute kernel code).

The last two simulation levels are *also* called emulation. In the case
of some very popular embedded control CPUs, hardware emulation is often
used for software development purposes instead of the real processor,
because it has much better debugging capabilities.

The Linux to Itanium port was done using "ski", a functional simulator
developed by HP and gcc as a cross-compiler on x86 hardware.
The kernel people used ski in kernel mode, the glibc people used it in
user mode. Everything was ready by the time Intel produced the first
silicon prototypes of Itanium. I don't remember anyone calling ski
"emulator", although it wouldn't have been technically incorrect.

Dan

Case · Jun 24, 2004

Eric said:
Note that you must issue the occasional no-op when
using instructions of the first type, to give the singed
registers time to cool.

Is this some kind of joke?

Case

Case · Jun 24, 2004

Eric said:
Note that you must issue the occasional no-op when
using instructions of the first type, to give the singed
registers time to cool.

I've worked on a MIPS assembler for a short while, and know
all about NOPs in delay slots; but I can't remember to have
seen any code in that assembler generating cooling NOPs.

So, is this somekind of joke, or a cool feature?

Case

Dan Pop · Jun 24, 2004

In said:
I didn't say what kind of int, so technically what you propose
is covered by my statement ;-)

Nope, in C int is a synonym for signed int:

2 At least one type specifier shall be given in the declaration
specifiers in each declaration, and in the specifier-qualifier
list in each struct declaration and type name. Each list of
type specifiers shall be one of the following sets (delimited
by commas, when there is more than one set on a line); the type
specifiers may occur in any order, possibly intermixed with the
other declaration specifiers.
....
- int, signed, or signed int
....

Dan

Dan Pop · Jun 24, 2004

In said:
Yes, you're right, the PC is best typed as unsigned. I'm not sure
about the registers.

I am. Think hard about the properties of two's complement arithmetic and
the semantics of unsigned arithmetic in C.

Values in registers are seen as 2-s complement
by instruction in at least some CPU's (e.g., MIPS has pairs of
similar instructions for singed and unsigned register operand).

That's why you want unsigned, which nicely simulates two's complement
behaviour.

CISC CPUs using two's complement have only one ADD instruction and set
their flags according to the result (if the Carry flag is set, unsigned
overflow occured, if the Overflow flag is set, signed overflow occured).
If no flag is set, the result is correct if interpreted as both signed
and unsigned. Implementing this in C, using unsigned arithmetic, is left
as an exercise for the reader.

Dan

CBFalconer · Jun 24, 2004

Case said:
I've worked on a MIPS assembler for a short while, and know
all about NOPs in delay slots; but I can't remember to have
seen any code in that assembler generating cooling NOPs.

So, is this somekind of joke, or a cool feature?

It's a cool feature, else those singed registers are liable to
char, which would make them incapable of holding anything
passively unsinged.

CBFalconer · Jun 24, 2004

Dan said:
.... snip ...

Many of these people were vendors of such products. But, of
course, you know better than them...

^^^^
Aha! Finally caught in an English inaccuracy. They.

Eric Sosman · Jun 24, 2004

John said:
Huh?

0x8000 = -32768
0x8000 + 1 = 0x8001 = -32767
0x8001 + 1 = 0x8002 = -32766
...

Show me where adding 1 to a negative binary number will cause the low
order bits to "run backwards" compared to adding 1 to an unsigned binary
number.

It seems I neglected to mention that the machine used
signed magnitude representation for negative integers:

-32768 = 0x800000008000
-32768 + 1 = -32767 = 0x800000007FFF
-32767 + 1 = -32766 = 0x800000007FFE
...

Sorry for the omission. Honeywell 8200, IIRC, and I think
the year was 1968.

Dan Pop · Jun 24, 2004

In said:
^^^^
Aha! Finally caught in an English inaccuracy. They.

If this is my first English inaccuracy you've caught in one of my posts,
you must be *really* reading them with your brain firmly set into Neutral.

Given that I've never even tried to learn the proper English grammar and
that I'm posting quite a lot, there *must* be several English mistakes in
my daily output... (without even mentioning typos and omitted words)

Dan

RoSsIaCrIiLoIA · Jun 24, 2004

I have to really wonder about this design for a CPU emulator. Some
of the registers listed above share storage (for example, ax consists
of ah and al concatenated, and eax contains ax). Every time you
change one register (e.g. eax, ax, al, or ah), you have to change
all of them, or keep track of which one is more up to date. This
tends to make things slow. and bug-prone, if you're not careful.
Assembly code WILL occasionally make use of this fact, for example,
loading ah with 0 to do an unsigned-extension of al into ax may be
the fastest way to accomplish this.

The "union r32" in my PC seems like a x86 general pourpose register
Are you agree?

#include <stdio.h>
#include <stdint.h> /* or stddef don't remember for uintxx_t */

union r32
{
union
{
struct
{
uint8_t l;
uint8_t h;
}b;
uint16_t val;
}w;
uint32_t val;
};

/* all global */
union r32 eax_, ebx_, ecx_, edx_;
union r32 esi_, edi_, ebp_, esp_, eip_;
uint16_t cs, ds, es, ss, fs, gs, flags;

#define eax (eax_.val)
#define ax (eax_.w.val)
#define al (eax_.w.b.l)
#define ah (eax_.w.b.h)

#define ebx (ebx_.val)
#define bx (ebx_.w.val)
#define bl (ebx_.w.b.l)
#define bh (ebx_.w.b.h)

#define ecx (ecx_.val)
#define cx (ecx_.w.val)
#define cl (ecx_.w.b.l)
#define ch (ecx_.w.b.h)

#define edx (edx_.val)
#define dx (edx_.w.val)
#define dl (edx_.w.b.l)
#define dh (edx_.w.b.h)

#define sp (esp_.w)
#define bp (ebp_.w)
#define si (esi_.w)
#define di (edi_.w)
#define ip (eip_.w)
#define U unsigned
#define P printf

void inc_w(union r32* a)
{++ a->w.val;}

void inc_l(union r32* a)
{++ a->w.b.l;}

void inc_h(union r32* a)
{++ a->w.b.h;}

int main(void)
{
eax = 0xFEFEFEFE; ebx = 0xFAFAFAFA;
P("eax=0x%x ebx=Ox%x\n", (int) eax, (int) ebx);
ecx=50000; edx=512341;
ecx += edx;
printf("somma=0x%x ecx=0x%x\n", (int)(50000 + 512341), (int) ecx );
eax=0xFFFFFFFF;
printf("eax=0x%x ", (int) eax );
inc_w(&eax); /* inc ax ; or I would have to write inc_x(&eax_) ? */
P("inc ax -> eax=0x%x ax=0x%x\n", (int) eax, (int) ax );

eax=0xFFFFFFFF;
printf("eax=0x%x ", (int) eax );
inc_l(&eax); /* inc al */
P("inc ax -> eax=0x%x al=0x%x\n", (int) eax, (int) al );

eax=0xFFFFFFFF;
printf("eax=0x%x ", (int) eax );
inc_h(&eax); /* inc ah */
P("inc ah -> eax=0x%x ah=0x%x\n", (int) eax, (int) ah );

return 0;
}

/*
eax=0xfefefefe ebx=Oxfafafafa
somma=0x894a5 ecx=0x894a5
eax=0xffffffff inc ax -> eax=0xffff0000 ax=0x0
eax=0xffffffff inc ax -> eax=0xffffff00 al=0x0
eax=0xffffffff inc ah -> eax=0xffff00ff ah=0x0
*/

RoSsIaCrIiLoIA · Jun 24, 2004

This would be portable.

#include <stdio.h>
#include <stdint.h> /* or stddef don't remember for uintxx_t */

union r32{
uint8_t l;
uint16_t x;
uint32_t val;
};

/* all global */
union r32 eax_, ebx_, ecx_, edx_;
union r32 esi_, edi_, ebp_, esp_, eip_;
uint16_t cs, ds, es, ss, fs, gs, flags;

#define eax (eax_.val)
#define ax (eax_.x)
#define al (eax_.l)
#define ah ( (eax_.x>>8) & 0xFF ) /* not lvalue :-( */

#define ebx (ebx_.val)
#define bx (ebx_.x)
#define bl (ebx_.l)
#define bh ( (ebx_.x>>8) & 0xFF ) /* not lvalue */

#define ecx (ecx_.val)
#define cx (ecx_.x)
#define cl (ecx_.l)
#define ch ( (ecx_.x>>8) & 0xFF ) /* not lvalue */

#define edx (edx_.val)
#define dx (edx_.x)
#define dl (edx_.l)
#define dh ( (edx_.x>>8) & 0xFF ) /* not lvalue */

#define sp (esp_.x)
#define bp (ebp_.x)
#define si (esi_.x)
#define di (edi_.x)
#define ip (eip_.x)
#define U unsigned
#define P printf

void inc_x(union r32* a)
{++ a->x;}

void inc_l(union r32* a)
{++ a->l;}

void inc_h(union r32* a)
{uint8_t r;
/*-------------*/
r = ( a->x >>8) & 0xFF; ++r;
a->val = a->val & ( 0xFFFF00FF | (uint32_t) r << 8 );
}

int main(void)
{
eax = 0xFEFEFEFE; ebx = 0xFAFAFAFA;
P("eax=0x%x ebx=Ox%x\n", (int) eax, (int) ebx);
ecx=50000; edx=512341;
ecx += edx;
printf("somma=0x%x ecx=0x%x\n", (int)(50000 + 512341), (int) ecx );
eax=0xFFFFFFFF;
printf("eax=0x%x ", (int) eax );
inc_x(&eax); /* inc ax ; or I would have to write inc_x(&eax_) ? */
P("inc ax -> eax=0x%x ax=0x%x\n", (int) eax, (int) ax );

eax=0xFFFFFFFF;
printf("eax=0x%x ", (int) eax );
inc_l(&eax); /* inc al */
P("inc ax -> eax=0x%x al=0x%x\n", (int) eax, (int) al );

eax=0xFFFFFFFF;
printf("eax=0x%x ", (int) eax );
inc_h(&eax); /* inc ah */
P("inc ah -> eax=0x%x ah=0x%x\n", (int) eax, (int) ah );

return 0;
}

Case · Jun 24, 2004

Eric said:
Note that you must issue the occasional no-op when
using instructions of the first type, to give the singed
registers time to cool.

You are talking about signed registers. I was talking about the
way a CPU may 'see' the value contained in a register. And we
are talking in the context of mapping CPU to C. Evidently,
reading other posts, you have much much more experience in CPU
design issues. I don't get your point, could you please elaborate?

Case

Case - · Jun 24, 2004

Dan said:
Nope, in C int is a synonym for signed int:

I was writing English, not C.

Case

CBFalconer · Jun 24, 2004

Case said:
Eric Sosman wrote:
.... snip ...

You are talking about signed registers. I was talking about the

No, he's talking about singed registers.

Case · Jun 25, 2004

CBFalconer said:
... snip ...

No, he's talking about singed registers.

Sure. Would you care to tell me what singed registers are?
(It takes pecial effort to deliberately type ng.)

Case

Bare metal.	0	Sep 14, 2022
splinter web browser simulator causing constant BSODs	1	Apr 11, 2013
Boomer trying to learn coding in C and C++	6	Dec 16, 2022
Weird Behavior with Rays in C and OpenGL	4	Feb 12, 2024
Urgent ~~Simulator Question	5	Apr 10, 2004
tornado.web ioloop add_timeout eats CPU	3	Sep 3, 2012
How to try a range of hex values in C# code ?	0	Nov 19, 2022
13 year old C source code	14	Jul 28, 2011

CPU simulator written in C

osmium

Dingo

RoSsIaCrIiLoIA

RoSsIaCrIiLoIA

Alex Fraser

Dan Pop

Case

Case

Dan Pop

Dan Pop

CBFalconer

CBFalconer

Eric Sosman

Dan Pop

RoSsIaCrIiLoIA

RoSsIaCrIiLoIA

Case

Case -

CBFalconer

Case

Ask a Question

Similar Threads

Members online

Forum statistics

Latest Threads