K
Ken Camann
Hey everyone.
First of all let me say that I know that the C++ standard never makes
any guarantees about compiler implementation and thus about the
performance of any language feature. That said, r-value references
are clearly being included because they make certain optimization
analysis cases possible to identify, and every compiler will try to
include them.
Anyway, I've been playing around with some code in ConceptGCC using r-
value references and lots of inlined code to write something very
general that should simplify down to the speed of the old C version.
Needless to say, the generated assembly code is about twice as long so
it doesn't work. I'm not sure if thats because the optimizations
aren't in yet, or because I am asking for an unreasonable amount of
intelligence from the compiler (the kind that I can never count on
showing up, even years from now). While this isn't really a C++
language issue per se, many of the advantages of template/generic
programming is predicated on the idea the compiler can and will make
certain optimizations after it does the inline expansions.
Here is a simple C program which adds the first vector from a list in
interleaved format (x0, y0, z0, x1, y1, z1, ...) to the second vector
from a list in planar format (x0, x1, y0, y1, z0, z1). It adds
interleaved vector 0 to planar vector 1 and stores it in interleaved
vector 2:
int main (void)
{
double interleavedStorage[] = {
rand(), rand(), rand(),
rand(), rand(), rand(),
0, 0, 0
};
double planarX[] = {rand(), rand()};
double planarY[] = {rand(), rand()};
double planarZ[] = {rand(), rand()};
interleavedStorage[6] = interleavedStorage[0] + planarX[1];
interleavedStorage[7] = interleavedStorage[1] + planarY[1];
interleavedStorage[8] = interleavedStorage[2] + planarZ[1];
// This is here to make sure that the values are read, so that it
// actually does something
return (int)(interleavedStorage[0] + interleavedStorage[1] +
interleavedStorage[2]);
}
Here is my C++ program, which allows you to store a vector in any
format. I add some comments to explain what I think is supposed to
happen:
#include <utility>
#include <cstdlib>
namespace std
{
// I couldn't find what header std::move was in, so I
// remade it myself. Where is it?
template <typename T>
inline T&& move (T&& a)
{
return a;
}
}
using namespace std;
// A raw_vec holds (x, y, z) coordinates. With any luck, in this
// program a raw_vec will never actually exist and always be
// optimized away
struct raw_vec
{
inline raw_vec (double _x, double _y, double _z)
: x(_x), y(_y), z(_z)
{
}
double x, y, z;
};
// A ref_vec is a structure that references the x, y, and z
// coordinates of a vector managed by some other class.
// With any luck, a ref_vec will never actually exist. After all
// the inline expansion, the compiler will realize that it has
// references to variables but it already knew where those
// variables were in the first place.
struct ref_vec
{
inline ref_vec (double& _x, double& _y, double& _z)
: x(_x), y(_y), z(_z)
{
}
inline ref_vec (ref_vec&& temporary)
: x(std::move(temporary.x)),
y(std::move(temporary.y)),
z(std::move(temporary.z))
{
}
// Whenever a raw_vec is constructed, we hope the
// compiler realizes that it is never needed except
// to assign to the references held by this class,
// thus it should never construct a raw_vec and just
// assign to references directly
inline ref_vec& operator= (raw_vec&& raw)
{
x = std::move(raw.x);
y = std::move(raw.y);
z = std::move(raw.z);
return *this;
}
double& x;
double& y;
double& z;
};
// An interleaved vector container
class InterleavedVectors
{
public:
inline InterleavedVectors (double* p)
: basePtr(p)
{
}
// Return the references to x, y, z coordinates of the
// ith vector.
inline ref_vec operator[] (int i) const
{
return ref_vec(basePtr[3*i], basePtr[3*i+1], basePtr[3*i+2]);
}
private:
double* basePtr;
};
// A planar vector container
class PlanarVectors
{
public:
inline PlanarVectors (double* _x, double* _y, double* _z)
: x(_x), y(_y), z(_z)
{
}
// Return the references to x, y, z coordinates of the
// ith vector.
inline ref_vec operator[] (int i) const
{
return ref_vec(*(x + i), *(y + i), *(z + i));
}
private:
double* x;
double* y;
double* z;
};
// Adding two ref_vecs...The result is a raw_vec because operator+
// doesn't have access to the l-value it is assigning to. However, we
// hope that the raw_vec will never exist, because it is assigned
// immediately to a ref_vec (which has the storage we want to use)
// whose operator= (raw_vec&&) function should expand and write
// the values directly into the ref_vec. See the operator= function
// comment in ref_vec, and note the way it used below
inline raw_vec operator+ (const ref_vec& v1, const ref_vec& v2)
{
return raw_vec(v1.x + v2.x, v1.y + v2.y, v1.z + v2.z);
}
int main (void)
{
double interleavedStorage[] = {
rand(), rand(), rand(),
rand(), rand(), rand(),
0, 0, 0
};
double planarX[] = {rand(), rand()};
double planarY[] = {rand(), rand()};
double planarZ[] = {rand(), rand()};
// Our vectors use stack based storage
InterleavedVectors interleaved(interleavedStorage);
PlanarVectors planar(planarX, planarY, planarZ);
// We hope this line turns back into the C version. First the
// rhs constructs two ref_vecs (the result of the operator[]).
// But the operator[] is inlined to show the compiler that each
ref_vec
// actually corresponds directly to the storage pointer held by
// storage class, thus we hope it does not really make copies
// of anything. Then operator+ constructs a temporary raw_vec on
// on the rhs. However, it is assigned to another ref_vec, so we
// hope that the inlined ref_vec:perator= (raw_vec&&) function
// will expand and eliminate the need to actually make the
// temporary, and just store the values directly in the referenced
// coordinates held by the lhs ref_vec. And as before, this
// ref_vec is doing nothing more than serving as a temporary
// alias for InterleavedVector::basePtr and will be optimized out
interleaved[2] = interleaved[0] + planar[1];
return (int)(interleavedStorage[0] + interleavedStorage[1] +
interleavedStorage[2]);
}
First of all let me say that I know that the C++ standard never makes
any guarantees about compiler implementation and thus about the
performance of any language feature. That said, r-value references
are clearly being included because they make certain optimization
analysis cases possible to identify, and every compiler will try to
include them.
Anyway, I've been playing around with some code in ConceptGCC using r-
value references and lots of inlined code to write something very
general that should simplify down to the speed of the old C version.
Needless to say, the generated assembly code is about twice as long so
it doesn't work. I'm not sure if thats because the optimizations
aren't in yet, or because I am asking for an unreasonable amount of
intelligence from the compiler (the kind that I can never count on
showing up, even years from now). While this isn't really a C++
language issue per se, many of the advantages of template/generic
programming is predicated on the idea the compiler can and will make
certain optimizations after it does the inline expansions.
Here is a simple C program which adds the first vector from a list in
interleaved format (x0, y0, z0, x1, y1, z1, ...) to the second vector
from a list in planar format (x0, x1, y0, y1, z0, z1). It adds
interleaved vector 0 to planar vector 1 and stores it in interleaved
vector 2:
int main (void)
{
double interleavedStorage[] = {
rand(), rand(), rand(),
rand(), rand(), rand(),
0, 0, 0
};
double planarX[] = {rand(), rand()};
double planarY[] = {rand(), rand()};
double planarZ[] = {rand(), rand()};
interleavedStorage[6] = interleavedStorage[0] + planarX[1];
interleavedStorage[7] = interleavedStorage[1] + planarY[1];
interleavedStorage[8] = interleavedStorage[2] + planarZ[1];
// This is here to make sure that the values are read, so that it
// actually does something
return (int)(interleavedStorage[0] + interleavedStorage[1] +
interleavedStorage[2]);
}
Here is my C++ program, which allows you to store a vector in any
format. I add some comments to explain what I think is supposed to
happen:
#include <utility>
#include <cstdlib>
namespace std
{
// I couldn't find what header std::move was in, so I
// remade it myself. Where is it?
template <typename T>
inline T&& move (T&& a)
{
return a;
}
}
using namespace std;
// A raw_vec holds (x, y, z) coordinates. With any luck, in this
// program a raw_vec will never actually exist and always be
// optimized away
struct raw_vec
{
inline raw_vec (double _x, double _y, double _z)
: x(_x), y(_y), z(_z)
{
}
double x, y, z;
};
// A ref_vec is a structure that references the x, y, and z
// coordinates of a vector managed by some other class.
// With any luck, a ref_vec will never actually exist. After all
// the inline expansion, the compiler will realize that it has
// references to variables but it already knew where those
// variables were in the first place.
struct ref_vec
{
inline ref_vec (double& _x, double& _y, double& _z)
: x(_x), y(_y), z(_z)
{
}
inline ref_vec (ref_vec&& temporary)
: x(std::move(temporary.x)),
y(std::move(temporary.y)),
z(std::move(temporary.z))
{
}
// Whenever a raw_vec is constructed, we hope the
// compiler realizes that it is never needed except
// to assign to the references held by this class,
// thus it should never construct a raw_vec and just
// assign to references directly
inline ref_vec& operator= (raw_vec&& raw)
{
x = std::move(raw.x);
y = std::move(raw.y);
z = std::move(raw.z);
return *this;
}
double& x;
double& y;
double& z;
};
// An interleaved vector container
class InterleavedVectors
{
public:
inline InterleavedVectors (double* p)
: basePtr(p)
{
}
// Return the references to x, y, z coordinates of the
// ith vector.
inline ref_vec operator[] (int i) const
{
return ref_vec(basePtr[3*i], basePtr[3*i+1], basePtr[3*i+2]);
}
private:
double* basePtr;
};
// A planar vector container
class PlanarVectors
{
public:
inline PlanarVectors (double* _x, double* _y, double* _z)
: x(_x), y(_y), z(_z)
{
}
// Return the references to x, y, z coordinates of the
// ith vector.
inline ref_vec operator[] (int i) const
{
return ref_vec(*(x + i), *(y + i), *(z + i));
}
private:
double* x;
double* y;
double* z;
};
// Adding two ref_vecs...The result is a raw_vec because operator+
// doesn't have access to the l-value it is assigning to. However, we
// hope that the raw_vec will never exist, because it is assigned
// immediately to a ref_vec (which has the storage we want to use)
// whose operator= (raw_vec&&) function should expand and write
// the values directly into the ref_vec. See the operator= function
// comment in ref_vec, and note the way it used below
inline raw_vec operator+ (const ref_vec& v1, const ref_vec& v2)
{
return raw_vec(v1.x + v2.x, v1.y + v2.y, v1.z + v2.z);
}
int main (void)
{
double interleavedStorage[] = {
rand(), rand(), rand(),
rand(), rand(), rand(),
0, 0, 0
};
double planarX[] = {rand(), rand()};
double planarY[] = {rand(), rand()};
double planarZ[] = {rand(), rand()};
// Our vectors use stack based storage
InterleavedVectors interleaved(interleavedStorage);
PlanarVectors planar(planarX, planarY, planarZ);
// We hope this line turns back into the C version. First the
// rhs constructs two ref_vecs (the result of the operator[]).
// But the operator[] is inlined to show the compiler that each
ref_vec
// actually corresponds directly to the storage pointer held by
// storage class, thus we hope it does not really make copies
// of anything. Then operator+ constructs a temporary raw_vec on
// on the rhs. However, it is assigned to another ref_vec, so we
// hope that the inlined ref_vec:perator= (raw_vec&&) function
// will expand and eliminate the need to actually make the
// temporary, and just store the values directly in the referenced
// coordinates held by the lhs ref_vec. And as before, this
// ref_vec is doing nothing more than serving as a temporary
// alias for InterleavedVector::basePtr and will be optimized out
interleaved[2] = interleaved[0] + planar[1];
return (int)(interleavedStorage[0] + interleavedStorage[1] +
interleavedStorage[2]);
}