Code Review Request: Newbie C programmer

P

Peter Karlsson

Hi gang,

My job description recently changed to include C coding, so I've got
to speed learn C. :) Anyway, I would be grateful for comments of
this little exercise I hacked up.

First, Splint 3.1.1 --- 13 Oct 2004, warns (among other things):

6502dis.c:21:5: Variable exported but not used outside 6502dis:
address
...
6502dis.c:33:5: Function exported but not used outside 6502dis:
getbyte
...

and so on. My question is, should I care about these warnings? Or,
better yet, how do I fix them? (The "other things" it complains
about is unistd.h, getopt.)

Second, regarding style. Is my error checking ok? Have I done any stupid
newbie things?

Anyway, I'd be very thankful for any critisism.



/* A simple 6502 disassembler
*/

/*
* Includes
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <ctype.h>

#define VERSION "0.1"

/*
* Global variables
*/
extern char *optarg;
extern int optind, opterr;
int address;
int ld;
char *filename;

/*
* typedefs
*/
typedef unsigned short int usint;

/*
* Mnemonics
*/
static const char *ASM[] = {
"BRK", "ORA", "???", "???", "???", "ORA", "ASL", "???",
"PHP", "ORA", "ASL", "???", "???", "ORA", "ASL", "???",
"BPL", "ORA", "???", "???", "???", "ORA", "ASL", "???",
"CLC", "ORA", "???", "???", "???", "ORA", "ASL", "???",
"JSR", "AND", "???", "???", "BIT", "AND", "ROL", "???",
"PLP", "AND", "ROL", "???", "BIT", "AND", "ROL", "???",
"BMI", "AND", "???", "???", "???", "AND", "ROL", "???",
"SEC", "AND", "???", "???", "???", "AND", "ROL", "???",
"RTI", "EOR", "???", "???", "???", "EOR", "LSR", "???",
"PHA", "EOR", "LSR", "???", "JMP", "EOR", "LSR", "???",
"BVC", "EOR", "???", "???", "???", "EOR", "LSR", "???",
"CLI", "EOR", "???", "???", "???", "EOR", "???", "???",
"RTS", "ADC", "???", "???", "???", "ADC", "ROR", "???",
"PLA", "ADC", "ROR", "???", "JMP", "ADC", "ROR", "???",
"BVS", "ADC", "???", "???", "???", "ADC", "ROR", "???",
"SEI", "ADC", "???", "???", "???", "ADC", "ROR", "???",
"???", "STA", "???", "???", "STY", "STA", "STX", "???",
"DEY", "???", "TXA", "???", "STY", "STA", "STX", "???",
"BCC", "STA", "???", "???", "STY", "STA", "???", "???",
"TYA", "STA", "TXS", "???", "???", "STA", "???", "???",
"LDY", "LDA", "LDX", "???", "LDY", "LDA", "LDX", "???",
"TAY", "LDA", "TAX", "???", "LDY", "LDA", "LDX", "???",
"BCS", "LDA", "???", "???", "LDY", "LDA", "LDX", "???",
"CLV", "LDA", "TSX", "???", "LDY", "LDA", "LDX", "???",
"CPY", "CMP", "???", "???", "CPY", "CMP", "DEC", "???",
"INY", "CMP", "DEX", "???", "CPY", "CMP", "DEC", "???",
"BNE", "CMP", "???", "???", "???", "CMP", "DEC", "???",
"CLD", "CMP", "???", "???", "???", "CMP", "DEC", "???",
"CPX", "SBC", "???", "???", "CPX", "SBC", "INC", "???",
"INX", "SBC", "NOP", "???", "CPX", "SBC", "INC", "???",
"BEQ", "SBC", "???", "???", "???", "SBC", "INC", "???",
"SED", "SBC", "???", "???", "???", "SBC", "INC", "???"
};

/*
* Size of mnemonics
*/
static const usint ASMSIZE[256] = {
1, 2, 1, 1, 1, 2, 2, 1,
1, 2, 1, 1, 1, 3, 3, 1,
2, 2, 1, 1, 1, 2, 2, 1,
1, 3, 1, 1, 1, 3, 3, 1,
3, 2, 1, 1, 2, 2, 2, 1,
1, 2, 1, 1, 3, 3, 3, 1,
2, 2, 1, 1, 1, 2, 2, 1,
1, 3, 1, 1, 1, 3, 3, 1,
1, 2, 1, 1, 1, 2, 3, 1,
1, 2, 1, 1, 3, 3, 3, 1,
2, 2, 1, 1, 1, 2, 2, 1,
1, 3, 1, 1, 1, 3, 1, 1,
1, 2, 1, 1, 1, 2, 2, 1,
1, 2, 1, 1, 3, 3, 3, 1,
2, 2, 1, 1, 1, 2, 2, 1,
1, 3, 1, 1, 1, 3, 3, 1,
1, 2, 1, 1, 2, 2, 2, 1,
1, 1, 1, 1, 3, 3, 3, 1,
2, 2, 1, 1, 2, 2, 1, 1,
1, 3, 1, 1, 1, 3, 1, 1,
2, 2, 2, 1, 2, 2, 2, 1,
1, 2, 1, 1, 3, 3, 3, 1,
2, 2, 1, 1, 2, 2, 2, 1,
1, 3, 1, 1, 3, 3, 3, 1,
2, 2, 1, 1, 2, 2, 2, 1,
1, 2, 1, 1, 3, 3, 3, 1,
2, 2, 1, 1, 1, 2, 2, 1,
1, 3, 1, 1, 1, 3, 3, 1,
2, 2, 1, 1, 2, 2, 2, 1,
1, 2, 1, 1, 3, 3, 3, 1,
2, 2, 1, 1, 1, 2, 2, 1,
1, 3, 1, 1, 1, 3, 3, 1
};

/* Kind of mnemonic
* 1: #$nn
* 2: $nnnn
* 3: $nnnn,x
* 4: $nnnn,y
* 5: $nn
* 6: ($nn,x)
* 7: ($nn),y
* 8: $nn,x
* 9: ($nnnn)
* 10: $nn,y
* 11: branches
*/
static const usint ASMMODE[256] = {
0, 6, 0, 0, 0, 5, 5, 0,
0, 1, 0, 0, 0, 2, 2, 0,
11,7, 0, 0, 0, 8, 8, 0,
0, 4, 0, 0, 0, 3, 3, 0,
2, 6, 0, 0, 5, 5, 5, 0,
0, 1, 0, 0, 2, 2, 2, 0,
11,7, 0, 0, 0, 8, 8, 0,
0, 4, 0, 0, 0, 3, 3, 0,
0, 6, 0, 0, 0, 5, 3, 0,
0, 1, 0, 0, 2, 2, 2, 0,
11,7, 0, 0, 0, 8, 8, 0,
0, 4, 0, 0, 0, 3, 0, 0,
0, 6, 0, 0, 0, 5, 5, 0,
0, 1, 0, 0, 9, 2, 2, 0,
11,7, 0, 0, 0, 8, 8, 0,
0, 4, 0, 0, 0, 3, 3, 0,
0, 6, 0, 0, 5, 5, 5, 0,
0, 0, 0, 0, 2, 2, 2, 0,
11,7, 0, 0, 8, 8, 0, 0,
0, 4, 0, 0, 0, 3, 0, 0,
1, 6, 1, 0, 5, 5, 5, 0,
0, 1, 0, 0, 2, 2, 2, 0,
11,7, 0, 0, 8, 8, 10,0,
0, 4, 0, 0, 3, 3, 4, 0,
1, 6, 0, 0, 5, 5, 5, 0,
0, 1, 0, 0, 2, 2, 2, 0,
11,7, 0, 0, 0, 8, 8, 0,
0, 4, 0, 0, 0, 3, 3, 0,
1, 6, 0, 0, 5, 5, 5, 0,
0, 1, 0, 0, 2, 2, 2, 0,
11,7, 0, 0, 0, 8, 8, 0,
0, 4, 0, 0, 0, 3, 3, 0
};

/*
* Get options
*/
void parse_options(int argc, char **argv)
{
int optchar;

while ((optchar = getopt(argc, argv, "a:h")) >= 0) {
switch (optchar) {
case 'a':
address = atoi(optarg);
ld = 1;
break;

case 'h':
(void) fputs("6502dis " VERSION " (" __DATE__ ")\n", stderr);
(void) fputs("Usage: 6502dis [options] inputfile\n"
"Options are:\n"
" -a addr decimal loading address\n"
" -h this help text\n", stderr);
exit(EXIT_SUCCESS);

default:
exit(EXIT_FAILURE);
}
}

if (!(filename = argv[optind])) {
(void) fputs("Error: no input file\n", stderr);
exit(EXIT_FAILURE);
}

return;
}

/*
* Get a byte from file
*/
int getbyte(FILE * fsource)
{
int opc;

if ((opc = fgetc(fsource)) == EOF) {
printf("EOF\n");
(void) fclose(fsource);
exit(EXIT_SUCCESS);
}

return (opc);
}

/*
* main
*/
int main(int argc, char **argv)
{
FILE *fsource;
int adr, adr_low, adr_high, opcode = 0, low = 0, high = 0;
usint size, mode;

address = 0;
ld = 0;
parse_options(argc, argv);

if (!(fsource = fopen(filename, "rb"))) {
fprintf(stderr, "Error: could not open: %s\n", filename);
exit(EXIT_FAILURE);
}

if (ld == 0) {
adr_low = getbyte(fsource);
adr_high = getbyte(fsource);
address = (adr_high * 256) + adr_low;
}

printf("Filename: %s\n", filename);
printf("Loading address: %d ($%04X)\n\n", address, (usint) address);

while (opcode > -1) {
opcode = getbyte(fsource);
printf("$%04X ", (usint) address);

size = ASMSIZE[opcode];
switch (size) {
case 2:
low = getbyte(fsource);
address++;
address++;
printf("%02X %02X ", (usint) opcode, (usint) low);
break;

case 3:
low = getbyte(fsource);
high = getbyte(fsource);
address++;
address++;
address++;
printf("%02X %02X %02X ", (usint) opcode, (usint) low,
(usint) high);
break;

default:
address++;
printf("%02x ", (usint) opcode);
break;
}

printf("%s ", ASM[opcode]);

mode = ASMMODE[opcode];
switch (mode) {
case 1:
printf("#$%02X", (usint) low);
break;

case 2:
printf("$%02X%02X", (usint) high, (usint) low);
break;

case 3:
printf("$%02X%02X,X", (usint) high, (usint) low);
break;

case 4:
printf("$%02X%02X,Y", (usint) high, (usint) low);
break;

case 5:
printf("$%02X", (usint) low);
break;

case 6:
printf("($%02X,X)", (usint) low);
break;

case 7:
printf("($%02X),Y", (usint) low);
break;

case 8:
printf("$%02X,X", (usint) low);
break;

case 9:
printf("($%02X%02X)", (usint) high, (usint) low);
break;

case 10:
printf("$%02X,Y", (usint) low);
break;

case 11:
if (low <= 127)
adr = address + low;
else
adr = address - (256 - low);

printf("$%04X", (usint) adr);
break;

default:
break;
}
printf("\n");
}

(void) fclose(fsource);
exit(EXIT_SUCCESS);
}


Best regards,
Peter Karlsson
 
C

Chris Barts

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Peter Karlsson wrote:
[bobbit]
| First, Splint 3.1.1 --- 13 Oct 2004, warns (among other things):
|
| 6502dis.c:21:5: Variable exported but not used outside 6502dis:
| address
| ...
| 6502dis.c:33:5: Function exported but not used outside 6502dis:
| getbyte
| ...
|
| and so on. My question is, should I care about these warnings?
[bobbit]

Probably not. They simply state that a variable and a function have
external linkage but are only used by one module.

| Or, better yet, how do I fix them?

By adding `static' to the declarations of the variable and the function.
(Or to the function's signature if it isn't declared before definition.)
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.2.6 (GNU/Linux)
Comment: Using GnuPG with Thunderbird - http://enigmail.mozdev.org

iD8DBQFBcHPTBsEyCHtFPhsRAoroAJ9BMr05eDSfItsSRfaXDX2GEfoR1ACdEzwl
RFAI18SsXftFqHn8Uj14tak=
=9Q7v
-----END PGP SIGNATURE-----
 
P

Peter Karlsson

[re: splint warnings]
| Or, better yet, how do I fix them?

By adding `static' to the declarations of the variable and the function.
(Or to the function's signature if it isn't declared before definition.)

I see. Thank you very much for your input.

Best regards,
Peter Karlsson
 
S

Spiro Trikaliotis

Hello Peter,


nice to meet you here. ;-)

First, Splint 3.1.1 --- 13 Oct 2004, warns (among other things):

6502dis.c:21:5: Variable exported but not used outside 6502dis:
address
...
6502dis.c:33:5: Function exported but not used outside 6502dis:
getbyte
...

Well, what would someone like you write as an exercise? A 6502
disassembler? ;-)
and so on. My question is, should I care about these warnings? Or,
better yet, how do I fix them? (The "other things" it complains
about is unistd.h, getopt.)

Well, these warnings are not serious. Chris Barts description is totally
correct.

Anyway, if you want to omit them (which is not bad, anyway, so you can
have a better lock at the relevant warnings), you can do the following:

Replace

int address;

by

static int address;

With the "static", you tell the tool chain that this variable should
have internal linkage, that is, it is not visible from the outside. The
same goes for


int getbyte(FILE * fsource)

which you can replace with

static int getbyte(FILE * fsource)

Obviously, anything that needs to be visible by other files (for the
linker) has to be defined without the "static".

Furthermore, be careful: A "static" before a variable inside of a
function has a different meaning!

HTH,
Spiro.
 
C

CBFalconer

Peter said:
My job description recently changed to include C coding, so I've got
to speed learn C. :) Anyway, I would be grateful for comments of
this little exercise I hacked up.

First, Splint 3.1.1 --- 13 Oct 2004, warns (among other things):

6502dis.c:21:5: Variable exported but not used outside 6502dis:
address
...
6502dis.c:33:5: Function exported but not used outside 6502dis:
getbyte
...

and so on. My question is, should I care about these warnings? Or,
better yet, how do I fix them? (The "other things" it complains
about is unistd.h, getopt.)

About the 'other things' read the splint documentation. There are
ways to handle such non-standard includes.

Yes, you should care. Failure to protect against this means that
outside code (i.e. in another compilation unit) can unexpectedly
modify your variables or call your functions at probably awkward
times. The cure is atrociously simple - mark them as being
static. This also prevents the maintenance programmer having to
search myriad files to determine if he can make changes in such
variable or functions.
 
P

Peter Karlsson

[snip]
The cure is atrociously simple - mark them as being static. This also
prevents the maintenance programmer having to search myriad files to
determine if he can make changes in such variable or functions.

Yes, I've learned a useful lesson regarding "static". Thank you very
much for your time and input.

Best regards,
Peter Karlsson
 
R

Richard Bos

Peter Karlsson said:
My job description recently changed to include C coding, so I've got
to speed learn C. :) Anyway, I would be grateful for comments of
this little exercise I hacked up.
and so on. My question is, should I care about these warnings? Or,
better yet, how do I fix them? (The "other things" it complains
about is unistd.h, getopt.)

getopt() is not ISO C, so I won't comment about it in this group.
/*
* Global variables
*/

Why these comments? Is it not plain that the following are global
variables, typedefs, and whatnot, without them?
/*
* Mnemonics
*/

_This_ is a useful comment. It tells me something which is obvious from
the source only if I've learned assembly before.
static const char *ASM[] = {
"BRK", "ORA", "???", "???", "???", "ORA", "ASL", "???",
"PHP", "ORA", "ASL", "???", "???", "ORA", "ASL", "???",
/* Kind of mnemonic
* 1: #$nn
* 2: $nnnn
* 3: $nnnn,x
* 4: $nnnn,y
* 5: $nn
* 6: ($nn,x)
* 7: ($nn),y
* 8: $nn,x
* 9: ($nnnn)
* 10: $nn,y
* 11: branches
*/

And this is a very useful comment, but it would've been even clearer to
use #defined constants or an enum for these.
void parse_options(int argc, char **argv)
{
address = atoi(optarg);

strtol() and strtoul() are safer, because your program won't invoke
undefined behaviour when I call it using "program 28795427897895438795".
atoi() causes UB when the number it tries to read overflows; strto*() do
not. In addition, they allow you to check for errors more easily.
case 'h':
(void) fputs("6502dis " VERSION " (" __DATE__ ")\n", stderr);

Ow... casts to void. If that lint you're using suggested you might need
them, it's antediluvian - get rid of it! Those casts are completely
unnecessary, and in fact make the code less readable.
(void) fputs("Usage: 6502dis [options] inputfile\n"
"Options are:\n"
" -a addr decimal loading address\n"
" -h this help text\n", stderr);
exit(EXIT_SUCCESS);

default:
exit(EXIT_FAILURE);

Style point, but I prefer to see a help text whenever I use any
non-supported option. I might try "program -?" or just "program" first.
Even better, but more complicated, is to try and find out _what_ went
wrong on the command line, and give an appropriate message; for example,
when I type "command -a filename", it would be nice to get a message
telling me explicitly that -a needs an address, not a filename.
int getbyte(FILE * fsource)
{
int opc;

if ((opc = fgetc(fsource)) == EOF) {
printf("EOF\n");
(void) fclose(fsource);
exit(EXIT_SUCCESS);

This is a bit drastic. It's rarely a good idea to let your low-level
input functions decide what to do when they're out of input - let the
high-level functions handle that.

Richard
 
P

Peter Karlsson

[snip very helpful tips]

Thank you very much for your time and comments Richard!
This was very helpful indeed.

Best regards,
Peter Karlsson
 
P

Peter Karlsson

I've got a question.

[snip]
This is a bit drastic. It's rarely a good idea to let your low-level
input functions decide what to do when they're out of input - let the
high-level functions handle that.

I understand that the error checking should be higher level. But, in
this case, where I call this function in many different places, it's
hard to optimize the code. That is having a bunch of "if error...".

I suppose I could make another function, "do_error()", or something to
that effect to minimize code duplication...

Hmmm...lots of thinking to do! :)


Best regards,
Peter Karlsson
 
R

Richard Bos

Peter Karlsson said:
I've got a question.

[snip]
This is a bit drastic. It's rarely a good idea to let your low-level
input functions decide what to do when they're out of input - let the
high-level functions handle that.

I understand that the error checking should be higher level. But, in
this case, where I call this function in many different places, it's
hard to optimize the code. That is having a bunch of "if error...".

I suppose I could make another function, "do_error()", or something to
that effect to minimize code duplication...

Another possible approach would be not to read one operator, print
something, read the operands, print something, etc., but to read an
entire assembly command in one go, and then print either an error
message or the description and code for that command.

Richard
 
A

Alan Balmer

Why these comments? Is it not plain that the following are global
variables, typedefs, and whatnot, without them?

Not the only reason for comments :) I've worked for shops which
require such comments as part of a standard heading, and I've used
tools which used such comments to locate sections of the code.
 
P

Peter Karlsson

[snip]
Another possible approach would be not to read one operator, print
something, read the operands, print something, etc., but to read an
entire assembly command in one go, and then print either an error
message or the description and code for that command.

That's a good idea. I'll try it out.


Thanks,
Peter Karlsson
 
O

Old Wolf

Why these comments? Is it not plain that the following are global
variables, typedefs, and whatnot, without them?

It is more like a bookmark, or the header of a set of tabsheets.

In some projects I work on, each file starts off with
the includes, then sections "Defines", "Macro Defines",
"Static Variables" (and/or "External Variables"),
"Function Declarations", then finally "Functions".

This can make the file more readable and orderly. If I go and
work on another project where I didn't do something like this,
then the first 1 or 2 screensful of code can be a mishmash
of various declarations and it can take a few moments to
sort out what is going on, or to find a particular declaration
that I am looking for, etc.
 
C

CBFalconer

Old said:
It is more like a bookmark, or the header of a set of tabsheets.

In some projects I work on, each file starts off with
the includes, then sections "Defines", "Macro Defines",
"Static Variables" (and/or "External Variables"),
"Function Declarations", then finally "Functions".

This can make the file more readable and orderly. If I go and
work on another project where I didn't do something like this,
then the first 1 or 2 screensful of code can be a mishmash
of various declarations and it can take a few moments to
sort out what is going on, or to find a particular declaration
that I am looking for, etc.

Sounds as if someone left the door open to the Cobolists pen.

--
"I support the Red Sox and any team that beats the Yankees"
"Any baby snookums can be a Yankee fan, it takes real moral
fiber to be a Red Sox fan"
"I listened to Toronto come back from 3:0 in '42, I plan to
watch Boston come back from 3:0 in 04"
 
R

Richard Bos

CBFalconer said:
Sounds as if someone left the door open to the Cobolists pen.

I don't know any Cobol, but it smells like Pascal to me. All functions
together, all types together, all variables together. It works well
enough for a small program, but for larger projects, it's much clearer
(well, IMO, anyway) to group declarations by function rather than by
syntactical class. That is, all declarations belonging to the linked
list go together, both the typedefs and the functions; then all
declarations for IO; then all the declarations for the bignum functions;
and so forth.

Richard
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

No members online now.

Forum statistics

Threads
473,931
Messages
2,570,085
Members
46,536
Latest member
keelop

Latest Threads

Top