OT: For all the pre-emptive optimization some people do...

C

clayne

C is still blazingly fast for what one might think to be too
"iterative."

On one of our average dual xeon boxes (don't remember the exact clock
but it's nothing insane):

For a simple char cmp/set through an array of chars of various sizes
and iterations, say 8192 chars x 1000000 iterations using reasonable
accurate timing I log about 107 million chars operated on per second.
Now, granted I'm not doing much here but checking the value of a char
and setting it (I chose an arbitrary value that would guarantee a
false) but this is not too far off from what a typical parser/string
library effectively does.

I've even included a multi-threaded version of it just for fun.

$ uname -ripsv
SunOS 5.8 Generic_117351-24 i386 i86pc

$ cat digtrench.c
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <pthread.h>

#define NELEM(x) (sizeof(x) / sizeof(x[0]))

struct m_s {
float res;
long int max;
long int ne;
char *buf;
};

enum {
MAXC0 = 1000000,
MAXC1 = 125000,
MAXC2 = 8000,
THREAD_MIN = 2,
THREAD_MAX = 32
};

static char buf0[8192],
buf1[65536],
buf2[1024000],
buf_t0[32][8192];

void *measure_t(void *);
float measure(long int, long int, char *);
void results(long int, long int, float, struct m_s *, long int);

void *
measure_t(void *st)
{
struct m_s *m;

m = (struct m_s *)st;
m->res = measure(m->max, m->ne, m->buf);

return ((void *)NULL);
}

float
measure(long int max, long int ne, char *buf)
{
struct timespec st, fi;
float diff;
long int i, t;
FILE *fp;
char *c;

fp = fopen("/dev/null", "r");

clock_gettime(CLOCK_HIGHRES, &st); /* start */

for (i = 0; i < max; i++) {
fread(buf, 1, ne, fp);

for (c = buf, t = 0; t < ne; t++) {
if (*c != 'Z')
*c++ = 'Z';
}

fseek(fp, 0, SEEK_SET);
}

fclose(fp);

clock_gettime(CLOCK_HIGHRES, &fi); /* end */

diff = ((float)fi.tv_sec + ((float)fi.tv_nsec / 1000000000)) -
((float)st.tv_sec + ((float)st.tv_nsec / 1000000000));

return (diff);
}

void
results(long int max, long int ne, float diff, struct m_s *m, long int
mne)
{
long int i;

if (m == NULL) {
fprintf(stdout, "bufsize = %ld, cycles = %ld, time = %.4f,
char/sec = %.4f\n\n",
ne, max, diff, (float)ne * (float)max / diff);
} else {
for (i = 0; i < mne; i++) {
fprintf(stdout, "thread %2d: time = %.4f, char/sec:
%.4f\n",
i, m->res, (float)ne * (float)max / m->res);
m++;
}

fprintf(stdout, "bufsize = %ld, cycles = %ld, time = %.4f,
total char/sec = %.4f\n\n",
ne, max, diff, (float)ne * (float)max * (float)mne / diff);
}

return;
}

int
main(void)
{
struct timespec st, fi;
struct m_s m[32];
pthread_t t[32];
int r, i, tm;
float diff;

clock_getres(CLOCK_HIGHRES, &st);
fprintf(stdout, "CLOCK_HIGHRES res = %ld sec, %ld nsec\n\n",
(long)st.tv_sec, st.tv_nsec);

results(MAXC0, NELEM(buf0),
measure(MAXC0, NELEM(buf0), buf0), NULL, 0);
results(MAXC1, NELEM(buf1),
measure(MAXC1, NELEM(buf1), buf1), NULL, 0);
results(MAXC2, NELEM(buf2),
measure(MAXC2, NELEM(buf2), buf2), NULL, 0);

/* initialize thread specific structs */

for (r = 0; r < 32; r++) {
m[r].max = MAXC0;
m[r].ne = NELEM(buf_t0[r]);
m[r].buf = buf_t0[r];
m[r].res = 0;
}

for (tm = THREAD_MIN; tm <= THREAD_MAX; tm *= 2) { /* 2, 4, ... */
clock_gettime(CLOCK_HIGHRES, &st); /* start */

/* r is for create, i is for join */
for (r = 0, i = -tm; i < tm; r++, i++) {
if (r < tm)
pthread_create(&t[r], NULL, measure_t, &m[r]);
else
pthread_join(t, NULL);
}

clock_gettime(CLOCK_HIGHRES, &fi); /* finish */

diff = ((float)fi.tv_sec + ((float)fi.tv_nsec / 1000000000)) -
((float)st.tv_sec + ((float)st.tv_nsec / 1000000000));

results(MAXC0, NELEM(buf_t0[0]), diff, m, tm);
}

return (0);
}

$ gcc -pedantic -g3 -O0 -o digtrench digtrench.c -lrt -lpthread
$ ./digtrench
CLOCK_HIGHRES res = 0 sec, 20 nsec

bufsize = 8192, cycles = 1000000, time = 75.9570, char/sec =
107850547.4832

bufsize = 65536, cycles = 125000, time = 71.5670, char/sec =
114466125.8174

bufsize = 1024000, cycles = 8000, time = 71.3342, char/sec =
114839680.3395

thread 0: time = 78.4226, char/sec: 104459694.2545
thread 1: time = 78.4176, char/sec: 104466320.5801
bufsize = 8192, cycles = 1000000, time = 78.4240, total char/sec =
208915587.8287

thread 0: time = 147.5268, char/sec: 55528901.5718
thread 1: time = 153.9118, char/sec: 53225303.1474
thread 2: time = 158.1773, char/sec: 51789987.6976
thread 3: time = 151.0550, char/sec: 54231915.8732
bufsize = 8192, cycles = 1000000, time = 158.2141, total char/sec =
207111800.6210

thread 0: time = 307.0789, char/sec: 26677184.0128
thread 1: time = 312.4140, char/sec: 26221616.0658
thread 2: time = 311.0002, char/sec: 26340817.9196
thread 3: time = 307.4566, char/sec: 26644413.2487
thread 4: time = 308.5881, char/sec: 26546713.5227
thread 5: time = 307.1540, char/sec: 26670658.3911
thread 6: time = 309.9937, char/sec: 26426347.5657
thread 7: time = 308.5199, char/sec: 26552585.0285
bufsize = 8192, cycles = 1000000, time = 312.4464, total char/sec =
209751148.5611

thread 0: time = 616.9636, char/sec: 13277930.3252
thread 1: time = 622.2217, char/sec: 13165725.7653
thread 2: time = 609.1315, char/sec: 13448656.6647
thread 3: time = 611.7472, char/sec: 13391152.5905
thread 4: time = 609.0115, char/sec: 13451306.4885
thread 5: time = 616.8430, char/sec: 13280527.7471
thread 6: time = 620.6915, char/sec: 13198183.6899
thread 7: time = 611.7462, char/sec: 13391175.3035
thread 8: time = 612.1790, char/sec: 13381706.6322
thread 9: time = 607.7217, char/sec: 13479854.7984
thread 10: time = 606.8297, char/sec: 13499668.5877
thread 11: time = 622.2080, char/sec: 13166015.0579
thread 12: time = 619.4121, char/sec: 13225443.7329
thread 13: time = 622.5504, char/sec: 13158773.6545
thread 14: time = 621.9585, char/sec: 13171296.8815
thread 15: time = 614.7153, char/sec: 13326495.0238
bufsize = 8192, cycles = 1000000, time = 622.8664, total char/sec =
210433571.7155

thread 0: time = 1189.8522, char/sec: 6884888.8853
thread 1: time = 1225.6113, char/sec: 6684011.3273
thread 2: time = 1208.4497, char/sec: 6778933.3328
thread 3: time = 1181.7941, char/sec: 6931833.7484
thread 4: time = 1236.4375, char/sec: 6625486.5288
thread 5: time = 1214.5646, char/sec: 6744803.9959
thread 6: time = 1232.6667, char/sec: 6645754.0231
thread 7: time = 1223.3357, char/sec: 6696444.8470
thread 8: time = 1230.8901, char/sec: 6655346.2049
thread 9: time = 1228.2274, char/sec: 6669774.5765
thread 10: time = 1191.1541, char/sec: 6877363.9994
thread 11: time = 1203.1627, char/sec: 6808721.6016
thread 12: time = 1207.9166, char/sec: 6781925.0301
thread 13: time = 1236.2368, char/sec: 6626562.0723
thread 14: time = 1231.0618, char/sec: 6654418.3369
thread 15: time = 1225.4984, char/sec: 6684627.1791
thread 16: time = 1230.6062, char/sec: 6656881.7809
thread 17: time = 1222.1754, char/sec: 6702802.1503
thread 18: time = 1204.6393, char/sec: 6800375.9473
thread 19: time = 1234.6458, char/sec: 6635101.5966
thread 20: time = 1241.5927, char/sec: 6597977.1956
thread 21: time = 1234.5055, char/sec: 6635855.4461
thread 22: time = 1243.4310, char/sec: 6588222.2661
thread 23: time = 1216.5513, char/sec: 6733789.3644
thread 24: time = 1216.0751, char/sec: 6736426.2127
thread 25: time = 1232.8319, char/sec: 6644863.6988
thread 26: time = 1243.8838, char/sec: 6585824.2322
thread 27: time = 1244.1609, char/sec: 6584357.4369
thread 28: time = 1243.0504, char/sec: 6590239.5437
thread 29: time = 1218.6232, char/sec: 6722340.5961
thread 30: time = 1236.5499, char/sec: 6624884.1415
thread 31: time = 1244.2644, char/sec: 6583809.6563
bufsize = 8192, cycles = 1000000, time = 1244.9033, total char/sec =
210573781.6927

I'm pretty sure there are cleaner ways to do this, but for 8GB+ chars,
it's close enough.
 
C

clayne

Not at all.

I was simply saying what may 'look' slow or repetitive (byte by byte
manipulation over regions of memory, etc. for example) - which is
typically what starts people on pre-emptively optimizing while first
implementing - is actually fairly quick in the first place.
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

Forum statistics

Threads
473,754
Messages
2,569,527
Members
44,999
Latest member
MakersCBDGummiesReview

Latest Threads

Top