Portably determine endianness?

A

Army1987

Is there anything wrong with this (assuming sizeof(int) < 10,
or '9' + 1, '9' + 2, ..., '0' + sizeof(int) are reasonable
characters, and bits aren't "shuffled" all around)?

#include <stdio.h>
#include <string.h>
#include <limits.h>
int main(void)
{
if (sizeof(int) > 1) {
unsigned int num = 0;
unsigned char repr[sizeof num];
unsigned int placeholder = '1';
size_t i;
enum {unknown, little, big} endianness = big;
for (i = 0; i < sizeof num; i++) {
num <<= CHAR_BIT;
num |= placeholder++;
}
memcpy(repr, &num, sizeof num);
placeholder = '1';
for (i = 0; i < sizeof num && endianness == big; i++)
if (repr != placeholder++)
endianness = little;
placeholder = '1';
for (i = 0; i < sizeof num && endianness == little; i++)
if (repr[sizeof num - 1 - i] != placeholder++)
endianness = unknown;
switch (endianness) {
case big:
printf("Big endian; ");
break;
case little:
printf("Little endian; ");
break;
default:
printf("Unknown endianness; ");
break;
}
printf("byte order: %.*s.\n", sizeof num, repr);
} else
puts("One-byte words: trivially both big-endian "
"and little-endian.");
return 0;
}
 
B

Ben Bacarisse

Army1987 said:
Is there anything wrong with this
if (sizeof(int) > 1) {

There were (are?) machines whose oddities are only revealed in the
endianity of longs -- not least in the fact that it may differ from
that of ints!
 
A

Army1987

Ben Bacarisse said:
There were (are?) machines whose oddities are only revealed in the
endianity of longs -- not least in the fact that it may differ from
that of ints!

Maybe use size_t?
(Actually, I added the code I posted upthread to Wikipedia,
replacing:
#include <stdio.h>
#include <stdint.h>

int main()
{
union {
int32_t i;
char c[sizeof(int32_t)];
} un;
un.i = ('U' << 24) | ('N' << 16) | ('I' << 8) | ('X' << 0);
if(un.c[0] == 'U' && un.c[1] == 'N' && un.c[2] == 'I' && un.c[3] == 'X')
printf("big-endian: ");
else if(un.c[0] == 'X' && un.c[1] == 'I' && un.c[2] == 'N' && un.c[3] == 'U')
printf("little-endian: ");
else
printf("unknown: ");
printf("%.*s\n", sizeof(int32_t), un.c);
return(0);
}
I'll keep the code posted by me, adding a comment saying that longs
can have different endianness, and also mentioning padding bits.
 
F

Flash Gordon

Army1987 wrote, On 01/07/07 16:39:
Maybe use size_t?

What makes you think size_t will show up oddities that are only on long?
It could be unsigned int. How about this which reports on char, short,
long and where C99 is claimed long long:

#include <stdio.h>
#include <limits.h>

int main(void)
{
size_t i;
unsigned char *bptr;
int not_big;
int not_little;

puts("No endianness for char");

if (sizeof(short) == 1)
puts("No endianness for short as it is only one byte");
else {
unsigned short n=0;
bptr = (unsigned char *)&n;
for (i=0; i<sizeof(short); i++)
n += (unsigned short)i << (i * CHAR_BIT);
for (i=0,not_big=0,not_little=0; i<sizeof(short); i++) {
if (bptr != i)
not_little = 1;
if (bptr != sizeof(short) - i - 1)
not_big = 1;
}
if (not_big)
if (not_little)
puts("Strange endianness for short");
else
puts("We have little endian short");
else
puts("We have big endian short");
}


if (sizeof(int) == 1)
puts("No endianness for int as it is only one byte");
else {
unsigned int n=0;
bptr = (unsigned char *)&n;
for (i=0; i<sizeof(int); i++)
n += (unsigned int)i << (i * CHAR_BIT);
for (i=0,not_big=0,not_little=0; i<sizeof(int); i++) {
if (bptr != i)
not_little = 1;
if (bptr != sizeof(int) - i - 1)
not_big = 1;
}
if (not_big)
if (not_little)
puts("Strange endianness for int");
else
puts("We have little endian int");
else
puts("We have big endian int");
}

if (sizeof(long) == 1)
puts("No endianness for long as it is only one byte");
else {
unsigned long n=0;
bptr = (unsigned char *)&n;
for (i=0; i<sizeof(long); i++)
n += (unsigned long)i << (i * CHAR_BIT);
for (i=0,not_big=0,not_little=0; i<sizeof(long); i++) {
if (bptr != i)
not_little = 1;
if (bptr != sizeof(long) - i - 1)
not_big = 1;
}
if (not_big)
if (not_little)
puts("Strange endianness for long");
else
puts("We have little endian long");
else
puts("We have big endian long");
}

#ifdef __STDC_VERSION__
# if __STDC_VERSION__==199901L
if (sizeof(long long) == 1)
puts("No endianness for long long as it is only one byte");
else {
unsigned long long n=0;
bptr = (unsigned char *)&n;
for (i=0; i<sizeof(long long); i++)
n += (unsigned long long)i << (i * CHAR_BIT);
for (i=0,not_big=0,not_little=0; i<sizeof(long long); i++) {
if (bptr != i)
not_little = 1;
if (bptr != sizeof(long long) - i - 1)
not_big = 1;
}
if (not_big)
if (not_little)
puts("Strange endianness for long long");
else
puts("We have little endian long long");
else
puts("We have big endian long long");
}
# endif
#endif

return 0;
}

(Actually, I added the code I posted upthread to Wikipedia,
replacing:
#include <stdio.h>
#include <stdint.h>

int main()
{
union {
int32_t i;
char c[sizeof(int32_t)];
} un;
un.i = ('U' << 24) | ('N' << 16) | ('I' << 8) | ('X' << 0);
if(un.c[0] == 'U' && un.c[1] == 'N' && un.c[2] == 'I' && un.c[3] == 'X')
printf("big-endian: ");
else if(un.c[0] == 'X' && un.c[1] == 'I' && un.c[2] == 'N' && un.c[3] == 'U')
printf("little-endian: ");
else
printf("unknown: ");
printf("%.*s\n", sizeof(int32_t), un.c);
return(0);
}

Well, I've seen worse. It assumes 8-bit bytes and C99 (or some C99 like
extensions) is what first strikes.
I'll keep the code posted by me, adding a comment saying that longs
can have different endianness, and also mentioning padding bits.

I'm sure I've come across systems where your had something like 2301 as
the byte ordering, but I may be miss-remembering and it was only a data
format. I've worked on some oddball embedded systems :)
 
O

Old Wolf

int main()
{
union {
int32_t i;
char c[sizeof(int32_t)];
} un;
un.i = ('U' << 24) | ('N' << 16) | ('I' << 8) | ('X' << 0);
if(un.c[0] == 'U' && un.c[1] == 'N' && un.c[2] == 'I' && un.c[3] == 'X')

This causes undefined behaviour in C90 (accessing
a member of a union that was not the last one set).
 
P

pete

Army1987 said:
Is there anything wrong with this (assuming sizeof(int) < 10,
or '9' + 1, '9' + 2, ..., '0' + sizeof(int) are reasonable
characters, and bits aren't "shuffled" all around)?

You can make as many assumptions as you want.
"Endianess" isn't a C concept.

If CHAR_BIT is 8 and INT_MAX is 0xffff,
then each byte having only its lowest significant bit set,
could be a valid representation of 3.
 
W

Walter Roberson

pete said:
If CHAR_BIT is 8 and INT_MAX is 0xffff,
then each byte having only its lowest significant bit set,
could be a valid representation of 3.

If CHAR_BIT was 8 and INT_MAX was 0xffff then sizeof(int) would have
to be at least 3, with the third byte needed to hold the sign
information. Having the lowest significant bit set in each of
3 bytes would be an odd representation indeed of 3, since
unary is not one of the representations allowed by C99.

Perhaps you meant "and INT_MAX is 0x7fff" ??
 
D

Dave Vandervies

If CHAR_BIT was 8 and INT_MAX was 0xffff then sizeof(int) would have
to be at least 3, with the third byte needed to hold the sign
information. Having the lowest significant bit set in each of
3 bytes would be an odd representation indeed of 3, since
unary is not one of the representations allowed by C99.

Perhaps you meant "and INT_MAX is 0x7fff" ??

"and UINT_MAX is 0xffff" seems more likely to me.


dave
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

Forum statistics

Threads
473,744
Messages
2,569,484
Members
44,906
Latest member
SkinfixSkintag

Latest Threads

Top