Can this conversion code be simplified?

E

Eric Lilja

Hello, I have IPv4-numbers in the following format:
"\\x0A\\x11\\x8C\\x01"

Now I need each byte as an int. I wrote the following test program:
#include <stdio.h>
#include <stdlib.h>

static void to_ip(const char *, int *);
static void extract_substring(const char *, u_short, u_short, char *);

int
main(void)
{
const char *ip_as_string="\\x0A\\x11\\x8C\\x01";

int ip[4];

to_ip(ip_as_string, ip);

printf("IP is %i.%i.%i.%i\n", ip[0], ip[1], ip[2], ip[3]);

return 0;
}

static void
extract_substring(const char *s, u_short start, u_short end, char *out)
{
u_short out_index = 0;

for(; start <= end; ++start)
out[out_index++] = s[start];

out[++start] = '\0';
}

static void
to_ip(const char *ip_as_string, int *ip)
{
char sub[5] = "0";
char *ptr = sub;
u_short start_index = 1;
u_short end_index = 3;
u_short i = 0;

for(; i < 4; ++i)
{
extract_substring(ip_as_string, start_index, end_index, ptr + 1);

start_index += 4;
end_index += 4;

ip = strtol(sub, NULL, 16);
}
}

It compiles cleanly and produces the following output:
$ ./convert_to_ip.exe
IP is 10.17.140.1


But since I am very poor on string manipulations in C, I'm guessing
there are alot cleaner ways to do this. Please help me improve the
code. It's a homework problem, but as you can see I attempted to solve
it myself first.

/ E
 
E

Eric Sosman

Eric said:
Hello, I have IPv4-numbers in the following format:
"\\x0A\\x11\\x8C\\x01"

Now I need each byte as an int. [...]

Consider using sscanf():

unsigned int b0, b1, b2, b3;
if (sscanf(string, "\\x%x\\x%x\\x%x\\x%x",
&b0, &b1, &b2, &b3) == 4) ...

I say "consider" rather than "use" because sscanf() is
fairly tolerant in what it will accept as a number, and
it does not insist on converting the entire input string.
For example, the above format would accept all of

"\\x1\\x23\\x456\\x789A"
"\\x a\\x 11\\x 8c\\x -1"
"\\x0A\\x11\\x8C\\x01,Drink Billy Beer"

If you are concerned about detecting and rejecting such
departures from the expected format, sscanf() may not be
the right tool. If not, though, its ease of use may make
it the weapon of choice.
 
J

Joe Wright

Eric said:
Hello, I have IPv4-numbers in the following format:
"\\x0A\\x11\\x8C\\x01"

Now I need each byte as an int. I wrote the following test program:
#include <stdio.h>
#include <stdlib.h>

static void to_ip(const char *, int *);
static void extract_substring(const char *, u_short, u_short, char *);

int
main(void)
{
const char *ip_as_string="\\x0A\\x11\\x8C\\x01";

int ip[4];

to_ip(ip_as_string, ip);

printf("IP is %i.%i.%i.%i\n", ip[0], ip[1], ip[2], ip[3]);

return 0;
}

static void
extract_substring(const char *s, u_short start, u_short end, char *out)
{
u_short out_index = 0;

for(; start <= end; ++start)
out[out_index++] = s[start];

out[++start] = '\0';
}

static void
to_ip(const char *ip_as_string, int *ip)
{
char sub[5] = "0";
char *ptr = sub;
u_short start_index = 1;
u_short end_index = 3;
u_short i = 0;

for(; i < 4; ++i)
{
extract_substring(ip_as_string, start_index, end_index, ptr + 1);

start_index += 4;
end_index += 4;

ip = strtol(sub, NULL, 16);
}
}

It compiles cleanly and produces the following output:
$ ./convert_to_ip.exe
IP is 10.17.140.1


But since I am very poor on string manipulations in C, I'm guessing
there are alot cleaner ways to do this. Please help me improve the
code. It's a homework problem, but as you can see I attempted to solve
it myself first.

/ E

I don't know yet. When I compile the code you posted I get..

s2ip.c:5: parse error before "u_short"
s2ip.c:19: parse error before "u_short"
s2ip.c: In function `extract_substring':
s2ip.c:21: `u_short' undeclared (first use in this function)
s2ip.c:21: (Each undeclared identifier is reported only once
s2ip.c:21: for each function it appears in.)
s2ip.c:21: parse error before "out_index"
s2ip.c:22: `start' undeclared (first use in this function)
s2ip.c:22: `end' undeclared (first use in this function)
s2ip.c:23: `out' undeclared (first use in this function)
s2ip.c:23: `out_index' undeclared (first use in this function)
s2ip.c:23: `s' undeclared (first use in this function)
s2ip.c: In function `to_ip':
s2ip.c:32: `u_short' undeclared (first use in this function)
s2ip.c:32: parse error before "start_index"
s2ip.c:35: `i' undeclared (first use in this function)
s2ip.c:36: `start_index' undeclared (first use in this function)
s2ip.c:36: `end_index' undeclared (first use in this function)

Which puts me off a bit.
 
E

Eric Lilja

Joe said:
Eric said:
Hello, I have IPv4-numbers in the following format:
"\\x0A\\x11\\x8C\\x01"

Now I need each byte as an int. I wrote the following test program:
#include <stdio.h>
#include <stdlib.h>

static void to_ip(const char *, int *);
static void extract_substring(const char *, u_short, u_short, char *);

int
main(void)
{
const char *ip_as_string="\\x0A\\x11\\x8C\\x01";

int ip[4];

to_ip(ip_as_string, ip);

printf("IP is %i.%i.%i.%i\n", ip[0], ip[1], ip[2], ip[3]);

return 0;
}

static void
extract_substring(const char *s, u_short start, u_short end, char *out)
{
u_short out_index = 0;

for(; start <= end; ++start)
out[out_index++] = s[start];

out[++start] = '\0';
}

static void
to_ip(const char *ip_as_string, int *ip)
{
char sub[5] = "0";
char *ptr = sub;
u_short start_index = 1;
u_short end_index = 3;
u_short i = 0;

for(; i < 4; ++i)
{
extract_substring(ip_as_string, start_index, end_index, ptr + 1);

start_index += 4;
end_index += 4;

ip = strtol(sub, NULL, 16);
}
}

It compiles cleanly and produces the following output:
$ ./convert_to_ip.exe
IP is 10.17.140.1


But since I am very poor on string manipulations in C, I'm guessing
there are alot cleaner ways to do this. Please help me improve the
code. It's a homework problem, but as you can see I attempted to solve
it myself first.

/ E

I don't know yet. When I compile the code you posted I get..

s2ip.c:5: parse error before "u_short"
s2ip.c:19: parse error before "u_short"
s2ip.c: In function `extract_substring':
s2ip.c:21: `u_short' undeclared (first use in this function)
s2ip.c:21: (Each undeclared identifier is reported only once
s2ip.c:21: for each function it appears in.)
s2ip.c:21: parse error before "out_index"
s2ip.c:22: `start' undeclared (first use in this function)
s2ip.c:22: `end' undeclared (first use in this function)
s2ip.c:23: `out' undeclared (first use in this function)
s2ip.c:23: `out_index' undeclared (first use in this function)
s2ip.c:23: `s' undeclared (first use in this function)
s2ip.c: In function `to_ip':
s2ip.c:32: `u_short' undeclared (first use in this function)
s2ip.c:32: parse error before "start_index"
s2ip.c:35: `i' undeclared (first use in this function)
s2ip.c:36: `start_index' undeclared (first use in this function)
s2ip.c:36: `end_index' undeclared (first use in this function)

Which puts me off a bit.


Hmm, I'm compiling using gcc with the flags -ansi -pedantic and I
thought that would turn off any compiler extension. Anyway, u_short is
simply a typedef for unsigned short int.

/ E
 
E

Eric Lilja

Eric said:
Eric said:
Hello, I have IPv4-numbers in the following format:
"\\x0A\\x11\\x8C\\x01"

Now I need each byte as an int. [...]

Consider using sscanf():

unsigned int b0, b1, b2, b3;
if (sscanf(string, "\\x%x\\x%x\\x%x\\x%x",
&b0, &b1, &b2, &b3) == 4) ...

I say "consider" rather than "use" because sscanf() is
fairly tolerant in what it will accept as a number, and
it does not insist on converting the entire input string.
For example, the above format would accept all of

"\\x1\\x23\\x456\\x789A"
"\\x a\\x 11\\x 8c\\x -1"
"\\x0A\\x11\\x8C\\x01,Drink Billy Beer"

If you are concerned about detecting and rejecting such
departures from the expected format, sscanf() may not be
the right tool. If not, though, its ease of use may make
it the weapon of choice.

Thanks for the reply. sscanf() would sure make for a much shorter
program, but maybe a bit too fragile. Given my test program, do you see
any immediate improvements I could do it, while keeping the same
approach so to speak?

/ E
 
E

Eric Sosman

Eric said:
Eric said:
Eric said:
Hello, I have IPv4-numbers in the following format:
"\\x0A\\x11\\x8C\\x01"

Now I need each byte as an int. [...]

Consider using sscanf(): [...]

Thanks for the reply. sscanf() would sure make for a much shorter
program, but maybe a bit too fragile. Given my test program, do you see
any immediate improvements I could do it, while keeping the same
approach so to speak?

Please don't take the criticism personally, but your
code as it stands is already rather fragile, certainly no
paragon of robustness. It plucks and converts the target
substrings without checking what's between them, without
checking whether they're of the expected length, and without
even checking whether they're valid hexadecimal numbers!
Consider what your code would do with the strings

"\\x0A$y-4999999999999999999"
"abcdefghijklmnopqrstuvwxyz"
""

Since your code had nothing to detect or reject such departures
from the expected format, it seemed to me you might be confident
that the input would always be valid and that sscanf()'s "format
enforcement," limited as it is, would be more error-checking
than you started with.

If you want to extract the values "by hand" -- which gives
you more control of the format, if you choose to exercise it --
I'd suggest you check for the presence of each '\\' and 'x'
and check that the next two characters are in fact hexadecimal
digits (use isxdigit() from <ctype.h>). Most of all, don't
just assume that the string has the expected length, lest you
walk blindly off the end and into No Man's Land.
 
C

CBFalconer

.... snip code and error listings ...
Hmm, I'm compiling using gcc with the flags -ansi -pedantic and
I thought that would turn off any compiler extension. Anyway,
u_short is simply a typedef for unsigned short int.

No it isn't. If it were there would be aline such as:

typedef unsigned short int u_short;

somewhere in your code. There isn't.

--
"If you want to post a followup via groups.google.com, don't use
the broken "Reply" link at the bottom of the article. Click on
"show options" at the top of the article, then click on the
"Reply" at the bottom of the article headers." - Keith Thompson
More details at: <http://cfaj.freeshell.org/google/>
Also see <http://www.safalra.com/special/googlegroupsreply/>
 
R

ranjmis

Eric said:
Hello, I have IPv4-numbers in the following format:
"\\x0A\\x11\\x8C\\x01"

Now I need each byte as an int. I wrote the following test program:
#include <stdio.h>
#include <stdlib.h>

static void to_ip(const char *, int *);
static void extract_substring(const char *, u_short, u_short, char *);

int
main(void)
{
const char *ip_as_string="\\x0A\\x11\\x8C\\x01";

int ip[4];

to_ip(ip_as_string, ip);
you can try this looping as well instead of to_ip function call:
----
const char *pc=ip_as_string;
register i=2;

while(i<16){

ip[i/4] = (*(pc+i)-(*(pc+i) < 'A' ?'0':'A'-10)) * 16
+ (*(pc+i+1) - (*(pc+i+1) < 'A'
?'0':'A'-10));
i+=4;
}
----
printf("IP is %i.%i.%i.%i\n", ip[0], ip[1], ip[2], ip[3]);

return 0;
}

static void
extract_substring(const char *s, u_short start, u_short end, char *out)
{
u_short out_index = 0;

for(; start <= end; ++start)
out[out_index++] = s[start];

out[++start] = '\0';
}

static void
to_ip(const char *ip_as_string, int *ip)
{
char sub[5] = "0";
char *ptr = sub;
u_short start_index = 1;
u_short end_index = 3;
u_short i = 0;

for(; i < 4; ++i)
{
extract_substring(ip_as_string, start_index, end_index, ptr + 1);

start_index += 4;
end_index += 4;

ip = strtol(sub, NULL, 16);
}
}

It compiles cleanly and produces the following output:
$ ./convert_to_ip.exe
IP is 10.17.140.1


But since I am very poor on string manipulations in C, I'm guessing
there are alot cleaner ways to do this. Please help me improve the
code. It's a homework problem, but as you can see I attempted to solve
it myself first.

/ E
 
M

Michael Mair

Eric said:
Hello, I have IPv4-numbers in the following format:
"\\x0A\\x11\\x8C\\x01"

Now I need each byte as an int. I wrote the following test program:
#include <stdio.h>
#include <stdlib.h>

static void to_ip(const char *, int *);
static void extract_substring(const char *, u_short, u_short, char *);

Apart from the missing typedef for u_short, prototypes without
parameter names make it hard to see which roles the parameters
have. In addition, some compilers warn if there are mismatches
in the parameter names of different prototypes (including the
function definition) -- this can be very useful if the meaning
of a parameter changes but not all uses have been adapted.
int
main(void)
{
const char *ip_as_string="\\x0A\\x11\\x8C\\x01";

int ip[4];

to_ip(ip_as_string, ip);

I'd rather have a consistent naming scheme:
extract_substring() <-> convert_string_to_ip()

Just stopping here and having a look:
- There is no way to tell whether to_ip() succeeded
- to_ip() has no way to tell whether the storage ip points to
is large enough
- I cannot tell here whether ip[x] will be be guaranteedly
initialised, so the following printf() may invoke undefined
behaviour.
printf("IP is %i.%i.%i.%i\n", ip[0], ip[1], ip[2], ip[3]);

return 0;
}

static void
extract_substring(const char *s, u_short start, u_short end, char *out)
{
u_short out_index = 0;

If you are using unsigned integer indices, then use the "natural"
type: size_t.
for(; start <= end; ++start)
out[out_index++] = s[start];

You do not respect string terminators here.
In fact, strncpy() may be for once a much better choice then this
loop.
out[++start] = '\0';
}

static void
to_ip(const char *ip_as_string, int *ip)
{
char sub[5] = "0";
char *ptr = sub;
u_short start_index = 1;
u_short end_index = 3;
u_short i = 0;

for(; i < 4; ++i)
{
extract_substring(ip_as_string, start_index, end_index, ptr + 1);

By restricting yourself to this substring by ignoring the '\\',
you effectively do not gain anything. Then you can omit all
checking and use start_index = 2 (6, 10, 14) instead.
start_index += 4;
end_index += 4;

ip = strtol(sub, NULL, 16);


You do not use the error checking capabilities of strtol() --
then you can as well use sscanf() and get rid of the dangerous
extract_substring() and the loop.
}
}

It compiles cleanly and produces the following output:
$ ./convert_to_ip.exe
IP is 10.17.140.1


But since I am very poor on string manipulations in C, I'm guessing
there are alot cleaner ways to do this. Please help me improve the
code. It's a homework problem, but as you can see I attempted to solve
it myself first.

Thank you for stating that :)
Eric Sosman already gave you good advice in
<[email protected]>, so I just will give you
a version that will work but make the teacher suspicious, i.e.
there still remains something to do ;-)

,----
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

enum {
E_ConvNoConversion = -1,
E_ConvHexOk = 0,
E_ConvRangeErr,
E_ConvFormatErr
};


static int convertToIP (const char *pIPAsString, int *pIP);
static int convertHexStringToHexValues (const char *pHexString,
int *pHexValArray,
size_t number);
static int isHexValOctett (unsigned long HexVal);
static int retrieveHexVal(const char *pHexString,
size_t *pIndex,
unsigned long *pHexVal);

int
main (void)
{
const char *ip_as_string="\\x0A\\x11\\x8C\\x01";
int ip[4] = {0};
int ret;

ret = convertToIP(ip_as_string, ip);

if (ret != E_ConvHexOk) {
fprintf(stderr, "Conversion not successful (%d): %s\n",
ret, ip_as_string);
exit(EXIT_FAILURE);
}

printf("IP is %i.%i.%i.%i\n", ip[0], ip[1], ip[2], ip[3]);

return 0;
}

static int
convertToIP (const char *pIPAsString, int *pIP)
{
return convertHexStringToHexValues(pIPAsString, pIP, 4);
}

static int
convertHexStringToHexValues (const char *pHexString,
int *pHexValArray,
size_t number)
{
size_t i;
size_t index = 0;
int ret = E_ConvNoConversion;
unsigned long HexVal;

for (i = 0; i < number; i++) {
if (E_ConvHexOk != (ret = retrieveHexVal(pHexString,
&index,
&HexVal))
)
break;
if (!isHexValOctett(HexVal)) {
ret = E_ConvRangeErr;
break;
}
pHexValArray = HexVal;
}

return ret;
}

static int isHexValOctett (unsigned long HexVal)
{
return HexVal < 256U;
}

static int retrieveHexVal(const char *pHexString,
size_t *pIndex,
unsigned long *pHexVal)
{
static const char startSequence[] = "\\x";
static const char allowedTermCharacters[] = {'\\', '\0'};
static const size_t startLen = sizeof startSequence - 1;
const char * substring = pHexString + *pIndex;
char *pEnd = NULL;
size_t index = 0;

if (0 != strncmp(substring, startSequence, startLen))
return E_ConvFormatErr;

index += startLen;
substring += index;

*pHexVal = strtoul(substring, &pEnd, 16);
if (pEnd == substring
|| !strchr(allowedTermCharacters, *pEnd)
)
return E_ConvFormatErr;

/*
if ((pEnd - substring) != 2)
return E_ConvFormatErr;
*/

index += pEnd - substring;

*pIndex += index;

return E_ConvHexOk;
}
`----

Note the difference in behaviour if you change the check
in retrieveHexVal() to the version in the comment; have
a look at "\\x0A\\x11\\x8C\\x01\\" and
"\\x0AB\\x11\\x8C\\x01", respectively.


Cheers
Michael
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

Forum statistics

Threads
473,755
Messages
2,569,535
Members
45,007
Latest member
obedient dusk

Latest Threads

Top