J
John Salmon
I'm working with two libraries, one written
in old school C, that returns a very large
chunk of data in the form of a C-style,
NUL-terminated string.
The other written in a more modern C++
is a parser for the chunk of bytes returned by
the first. It expects a reference to a
std::istream as its argument.
The chunk of data is very large.
I'd like to feed the output of the first to
the second WITHOUT MAKING AN EXTRA IN-MEMORY COPY.
My attempts to create an istringstream from the
chunk of data all seem to at least double the
amount of VM used. Here's a short program demonstrating
what I've tried. Is there any way to get "inside"
the istringstream and tell it to use the 'chunk'
directly, rather than insisting on making a copy?
Thanks,
John Salmon
[jsalmon@river c++]$ cat chararraytostream.cpp
#include <string>
#include <sstream>
#include <cstdlib>
#include <cstring>
#include <cstdio>
using namespace std;
char *getLotsOfBytes();
istream& streamParser(istream &s);
void linuxChkMem(const char *msg);
void withImplicitString(){
linuxChkMem("Before getLotsOfBytes: ");
char *chunk = getLotsOfBytes();
linuxChkMem("After getLotsOfBytes():");
{
istringstream iss(chunk);
linuxChkMem("After iss(p): ");
streamParser(iss);
linuxChkMem("After streamParser(iss): ");
}
linuxChkMem("After iss goes out of scope: ");
free(chunk);
linuxChkMem("After free(p): ");
}
void withExplicitString(){
linuxChkMem("Before getLotsOfBytes: ");
char *chunk = getLotsOfBytes();
linuxChkMem("After getLotsOfBytes():");
{
string s(chunk);
linuxChkMem("After s(chunk): ");
free(chunk);
linuxChkMem("After free(p): ");
istringstream iss(s);
linuxChkMem("After iss(s): ");
streamParser(iss);
linuxChkMem("After streamParser(iss): ");
}
linuxChkMem("After iss goes out of scope: ");
}
int main(int argc, char **argv){
printf("with an implicit string constructor\n");
withImplicitString();
printf("\nwith an explicit string constructor\n");
withExplicitString();
return 0;
}
// On linux, tell us how much data space we're using
// in the VM.
void linuxChkMem(const char *msg){
printf("%s", msg);
fflush(stdout);
char cmd[50];
sprintf(cmd, "grep VmData /proc/%d/status", getpid());
system(cmd);
}
static const int SZ = 100*1024*1024;
// A rough approximation to getLotsOfBytes. In the
// real application, getLotsOfBytes has these characteristics:
// - it returns a malloced pointer to a NUL-terminated array of chars.
// - it is out of my control. E.g., I can't rewrite it in a way
// that might be more friendly to C++ streams.
char *getLotsOfBytes(){
char *p = (char *)malloc(SZ);
memset(p, ' ', SZ);
strcpy(p+SZ-50, "3.1415 2.718 1.414");
return p;
}
// A rough approximation to streamParser. In the real
// application, streamParser takes a ref to an istream
// and does what it does. Again, I can't easily redefine
// the interface.
istream& streamParser(istream& s){
double x, y, z;
s >> x >> y >> z;
printf("x: %f y: %f z: %f\n", x, y, z);
return s;
}
[jsalmon@river c++]$ g++ -O3 chararraytostream.cpp
[jsalmon@river c++]$ a.out
with an implicit string constructor
Before getLotsOfBytes: VmData: 40 kB
After getLotsOfBytes():VmData: 102444 kB
After iss(p): VmData: 204848 kB
x: 3.141500 y: 2.718000 z: 1.414000
After streamParser(iss): VmData: 204980 kB
After iss goes out of scope: VmData: 102576 kB
After free(p): VmData: 172 kB
with an explicit string constructor
Before getLotsOfBytes: VmData: 172 kB
After getLotsOfBytes():VmData: 102576 kB
After s(chunk): VmData: 204980 kB
After free(p): VmData: 102576 kB
After iss(s): VmData: 204980 kB
x: 3.141500 y: 2.718000 z: 1.414000
After streamParser(iss): VmData: 204980 kB
After iss goes out of scope: VmData: 172 kB
[jsalmon@river c++]$
in old school C, that returns a very large
chunk of data in the form of a C-style,
NUL-terminated string.
The other written in a more modern C++
is a parser for the chunk of bytes returned by
the first. It expects a reference to a
std::istream as its argument.
The chunk of data is very large.
I'd like to feed the output of the first to
the second WITHOUT MAKING AN EXTRA IN-MEMORY COPY.
My attempts to create an istringstream from the
chunk of data all seem to at least double the
amount of VM used. Here's a short program demonstrating
what I've tried. Is there any way to get "inside"
the istringstream and tell it to use the 'chunk'
directly, rather than insisting on making a copy?
Thanks,
John Salmon
[jsalmon@river c++]$ cat chararraytostream.cpp
#include <string>
#include <sstream>
#include <cstdlib>
#include <cstring>
#include <cstdio>
using namespace std;
char *getLotsOfBytes();
istream& streamParser(istream &s);
void linuxChkMem(const char *msg);
void withImplicitString(){
linuxChkMem("Before getLotsOfBytes: ");
char *chunk = getLotsOfBytes();
linuxChkMem("After getLotsOfBytes():");
{
istringstream iss(chunk);
linuxChkMem("After iss(p): ");
streamParser(iss);
linuxChkMem("After streamParser(iss): ");
}
linuxChkMem("After iss goes out of scope: ");
free(chunk);
linuxChkMem("After free(p): ");
}
void withExplicitString(){
linuxChkMem("Before getLotsOfBytes: ");
char *chunk = getLotsOfBytes();
linuxChkMem("After getLotsOfBytes():");
{
string s(chunk);
linuxChkMem("After s(chunk): ");
free(chunk);
linuxChkMem("After free(p): ");
istringstream iss(s);
linuxChkMem("After iss(s): ");
streamParser(iss);
linuxChkMem("After streamParser(iss): ");
}
linuxChkMem("After iss goes out of scope: ");
}
int main(int argc, char **argv){
printf("with an implicit string constructor\n");
withImplicitString();
printf("\nwith an explicit string constructor\n");
withExplicitString();
return 0;
}
// On linux, tell us how much data space we're using
// in the VM.
void linuxChkMem(const char *msg){
printf("%s", msg);
fflush(stdout);
char cmd[50];
sprintf(cmd, "grep VmData /proc/%d/status", getpid());
system(cmd);
}
static const int SZ = 100*1024*1024;
// A rough approximation to getLotsOfBytes. In the
// real application, getLotsOfBytes has these characteristics:
// - it returns a malloced pointer to a NUL-terminated array of chars.
// - it is out of my control. E.g., I can't rewrite it in a way
// that might be more friendly to C++ streams.
char *getLotsOfBytes(){
char *p = (char *)malloc(SZ);
memset(p, ' ', SZ);
strcpy(p+SZ-50, "3.1415 2.718 1.414");
return p;
}
// A rough approximation to streamParser. In the real
// application, streamParser takes a ref to an istream
// and does what it does. Again, I can't easily redefine
// the interface.
istream& streamParser(istream& s){
double x, y, z;
s >> x >> y >> z;
printf("x: %f y: %f z: %f\n", x, y, z);
return s;
}
[jsalmon@river c++]$ g++ -O3 chararraytostream.cpp
[jsalmon@river c++]$ a.out
with an implicit string constructor
Before getLotsOfBytes: VmData: 40 kB
After getLotsOfBytes():VmData: 102444 kB
After iss(p): VmData: 204848 kB
x: 3.141500 y: 2.718000 z: 1.414000
After streamParser(iss): VmData: 204980 kB
After iss goes out of scope: VmData: 102576 kB
After free(p): VmData: 172 kB
with an explicit string constructor
Before getLotsOfBytes: VmData: 172 kB
After getLotsOfBytes():VmData: 102576 kB
After s(chunk): VmData: 204980 kB
After free(p): VmData: 102576 kB
After iss(s): VmData: 204980 kB
x: 3.141500 y: 2.718000 z: 1.414000
After streamParser(iss): VmData: 204980 kB
After iss goes out of scope: VmData: 172 kB
[jsalmon@river c++]$