Shallow copy and long long double on gcc 3.4.5

Discussion in 'C++' started by Francesco S. Carta, Sep 18, 2009.

  1. Hi there,
    after the posts about dumping objects' raw memory I've played with it
    a bit and I've come to discover that the compiler-created shallow copy
    does some kind of memcopy on the two objects, starting from [object's
    base address] up to [last member address + last member size].

    Meanwhile, I've discovered that the compiler accepts the "long long
    double" type declaration but treats it as "long long int".

    The compiler is my usual MinGW release: gcc 3.4.5.

    Well, this thread is just to show this new snippet of mine (that I
    used to study the disposition in memory of differently sized members)
    and to take the chance of receiving further good advices from the
    community, which are welcome as always.

    Code:

    -------
    #include <iostream>
    #include <string>
    #include <vector>
    #include <sstream>
    #include <iomanip>

    using namespace std;

    /*
    matches type id against default ones,
    returns id's full name
    */
    template<class T> string type_of() {
    string id = typeid(T).name();

    if (id == typeid(bool).name())
    id = "bool";
    else if (id == typeid(unsigned char).name())
    id = "unsigned char";
    else if (id == typeid(char).name())
    id = "char";
    else if (id == typeid(signed char).name())
    id = "signed char";
    else if (id == typeid(short).name())
    id = "short";
    else if (id == typeid(unsigned short).name())
    id = "unsigned short";
    else if (id == typeid(int).name())
    id = "int";
    else if (id == typeid(unsigned int).name())
    id = "unsigned int";
    else if (id == typeid(long).name())
    id = "long";
    else if (id == typeid(unsigned long).name())
    id = "unsigned long";
    else if (id == typeid(long long).name())
    id = "long long";
    else if (id == typeid(unsigned long long).name())
    id = "unsigned long long";
    else if (id == typeid(float).name())
    id = "float";
    else if (id == typeid(double).name())
    id = "double";
    else if (id == typeid(long double).name())
    id = "long double";

    /*
    your compiler may choke on the following "long long double" type,
    comment out the following "else if" block, in such case
    */

    else if (id == typeid(long long double).name()) {
    id = "long long double";
    }

    return id;
    }


    /*
    helper function used by six_types::print(),
    prints info about an object
    into the passed stream
    */
    template<class T> void detail_obj(ostream& os,
    const string& name,
    const T& obj,
    size_t wasted,
    size_t parent_addr = 0) {
    const size_t obj_addr = size_t(&obj);
    os << name << ", " << showbase << hex << obj_addr;

    if (parent_addr <= obj_addr) {
    os << dec << " (p." << obj_addr - parent_addr << ")";
    }

    os << ", " << dec << sizeof(T) << "B, " << type_of<T>();

    if (wasted) {
    os << ", " << wasted << "B wasted";
    }

    os << endl;
    }

    /*
    helper function, used below by six_types::print()
    */
    void hexbytes_w_chars(ostream& os,
    char ch,
    int used,
    int empty) {
    os << string(used * 2, ch) << string(empty * 2, ' ');
    }

    /*
    helper function, used below by six_types::print()
    */
    void hexbytes_w_brackets(ostream& os,
    int used,
    int empty) {
    os << "[" << string((used-1)*2, ' ') << "]";
    os << string(empty * 2, ' ');
    }


    /*
    the syx_types class - "Oh, really?" ;-)
    */
    template<class M0, class M1, class M2, class M3, class M4, class M5>

    class six_types {

    M0 m0;
    M1 m1;
    M2 m2;
    M3 m3;
    M4 m4;
    M5 m5;

    template<class T, class U>
    static size_t addr_diff(const T& t, const U& u) {
    size_t st = size_t(&t);
    size_t su = size_t(&u);
    return max(st, su) - min(st, su);
    }

    public:

    six_types() : m0(0), m1(0), m2(0), m3(0), m4(0), m5(0) {}

    size_t used_size() const {
    return sizeof(M0)
    + sizeof(M1)
    + sizeof(M2)
    + sizeof(M3)
    + sizeof(M4)
    + sizeof(M5);
    }

    template<class T>
    void set_to(const T& k) {
    m0 = M0(k);
    m1 = M1(k);
    m2 = M2(k);
    m3 = M3(k);
    m4 = M4(k);
    m5 = M5(k);
    }

    /*
    prints class members' data to passed stream,
    returns members' disposition in memory as
    - pair.first: named hexbytes
    - pair.second: bracketed hexbytes
    */
    pair<string, string> print(ostream& os) const {
    size_t sot = sizeof(*this);
    size_t s0 = sizeof(M0);
    size_t s1 = sizeof(M1);
    size_t s2 = sizeof(M2);
    size_t s3 = sizeof(M3);
    size_t s4 = sizeof(M4);
    size_t s5 = sizeof(M5);

    /* wasted space */
    size_t w0 = addr_diff(m0, m1) - s0;
    size_t w1 = addr_diff(m1, m2) - s1;
    size_t w2 = addr_diff(m2, m3) - s2;
    size_t w3 = addr_diff(m3, m4) - s3;
    size_t w4 = addr_diff(m4, m5) - s4;
    size_t w5 = sot - addr_diff(m5, *this) - s5;

    os << "TypeName: " << typeid(this).name() << endl;

    size_t this_addr = size_t(this);
    detail_obj(os, "A", m0, w0, this_addr);
    detail_obj(os, "B", m1, w1, this_addr);
    detail_obj(os, "C", m2, w2, this_addr);
    detail_obj(os, "D", m3, w3, this_addr);
    detail_obj(os, "E", m4, w4, this_addr);
    detail_obj(os, "F", m5, w5, this_addr);
    os << "---" << endl;

    size_t w = sot - used_size();

    if (w) {
    os << "Total " << sot;
    os << "B, wasted " << w << "B (";
    os << setprecision(2);
    os << 100.0 * w / sot << "%)" << endl;
    } else {
    os << "All " << sot << "B used, no space wasted" << endl;
    }

    os << "---" << endl;

    ostringstream oss1;
    hexbytes_w_chars(oss1, 'A', s0, w0);
    hexbytes_w_chars(oss1, 'B', s1, w1);
    hexbytes_w_chars(oss1, 'C', s2, w2);
    hexbytes_w_chars(oss1, 'D', s3, w3);
    hexbytes_w_chars(oss1, 'E', s4, w4);
    hexbytes_w_chars(oss1, 'F', s5, w5);

    ostringstream oss2;
    hexbytes_w_brackets(oss2, s0, w0);
    hexbytes_w_brackets(oss2, s1, w1);
    hexbytes_w_brackets(oss2, s2, w2);
    hexbytes_w_brackets(oss2, s3, w3);
    hexbytes_w_brackets(oss2, s4, w4);
    hexbytes_w_brackets(oss2, s5, w5);

    return make_pair(oss1.str(), oss2.str());
    }
    }; // end of six_types class


    /*
    dumps passed object's raw memory
    into passed stream as a sequence of hexbytes
    */
    template<class T>
    void dump_obj_memory(ostream& os, const T& obj) {
    const uint8_t* p = reinterpret_cast<const uint8_t*>(&obj);
    os << noshowbase << nouppercase << hex << setfill('0');

    for (size_t i = 0, e = sizeof(T); i < e; ++i) {
    os << setw(2) << uint16_t(*(p + i));
    }
    }

    /*
    returns passed object's raw memory as a string of hexbytes
    */
    template<class T> string dump_obj_memory(const T& obj) {
    ostringstream oss;
    dump_obj_memory(oss, obj);
    return oss.str();
    }

    /**
    crunches object's raw memory
    WARNING! overwrites _ALL_ object's data!
    WARNING! invalidates any pointer into the object!
    */
    template<class T> void crunch(T* obj,
    bool usepattern = true,
    uint8_t c = 0) {
    /// careful with that axe, Eugene...
    uint8_t* p = reinterpret_cast<uint8_t*>(obj);

    if (usepattern) {
    for (size_t i = 0, e = sizeof(T); i < e; ++i) {
    switch (i % 4) {
    case 0: *(p + i) = 0xDE; break;
    case 1: *(p + i) = 0xAD; break;
    case 2: *(p + i) = 0xBE; break;
    case 3: *(p + i) = 0xEF; break;
    }
    }
    } else {
    for (size_t i = 0, e = sizeof(T); i < e; ++i) {
    *(p + i) = c;
    }
    }
    }

    /*
    returns a string of markers (to visually index memory dumps)
    */
    string hexbyte_markers(size_t from, size_t to, size_t step = 4) {
    ostringstream oss;
    oss << to;
    size_t backs = oss.str().size();
    oss.str("");
    oss << left << setfill('\'');

    for (size_t i = from; i < to; i+=step) {
    oss << setw(step*2) << i;
    }

    string s = oss.str();

    oss.str("");
    oss << to;
    s.replace(s.size() - backs, backs, oss.str());
    return s;
    }

    /*
    writes to stream the passed strings,
    breaking them in chunks and
    interleaving chunks on different lines
    */
    void interleave(ostream& os,
    const vector<string>& vs,
    size_t limit = 64,
    char delimiter = '|') {
    size_t maxpos = 0;

    for (size_t i = 0, e = vs.size(); i < e; ++i) {
    maxpos = max(maxpos, vs.size());
    }

    for (size_t pos = 0; pos < maxpos; pos += limit) {
    for (size_t i = 0, e = vs.size(); i < e; ++i) {
    os << delimiter << vs.substr(pos, limit);
    os << delimiter << endl;
    }

    os << endl;
    }
    }

    /*
    testing routine for the syx_types class
    WARNING! calls the "crunch" function on the passed types!
    read the warnings of the "crunch" function up above
    */

    template<class M0, class M1, class M2, class M3, class M4, class M5>
    void test(const string& s) {

    six_types<M0, M1, M2, M3, M4, M5> six;

    cout << endl << string(64, '*') << endl << endl;
    cout << "# test(\"" << s << "\")" << endl;

    pair<string, string> res;
    res = six.print(cout);

    crunch(&six);
    six.set_to(0);

    vector<string> v;
    v.push_back(res.first);
    v.push_back(res.second);
    v.push_back(dump_obj_memory(six));
    v.push_back(hexbyte_markers(0, sizeof(six)));

    cout << "\nMembers dislocation:\n" << endl;

    interleave(cout, v, 48);

    cout << endl << endl;
    }

    struct assign_copy {
    uint8_t a;
    uint64_t b;
    uint8_t c;
    assign_copy(uint8_t i = 0) : a(i), b(i), c(i) {}

    assign_copy& operator=(const assign_copy& obj) {
    a = obj.a;
    b = obj.b;
    c = obj.c;
    return *this;
    }
    };

    struct shallow_copy {
    uint8_t a;
    uint64_t b;
    uint8_t c;
    shallow_copy(uint8_t i = 0) : a(i), b(i), c(i) {}
    };

    int main() {

    test <shallow_copy,
    assign_copy,
    char,
    char,
    char,
    char> ("shallow copy");

    /*
    your compiler may choke on the following "long long double" type
    */

    cout << "type_of<long long double>() == ";
    cout << type_of<long long double>() << endl;

    return 0;
    }
    -------

    Output:

    -------

    ****************************************************************

    # test("shallow copy")
    TypeName: PK9six_typesI12shallow_copy11assign_copyccccE
    A, 0x23fe60 (p.0), 24B, 12shallow_copy
    B, 0x23fe78 (p.24), 24B, 11assign_copy
    C, 0x23fe90 (p.48), 1B, char
    D, 0x23fe91 (p.49), 1B, char
    E, 0x23fe92 (p.50), 1B, char
    F, 0x23fe93 (p.51), 1B, char, 4B wasted
    ---
    Total 56B, wasted 4B (7.1%)
    ---

    Members dislocation:

    |AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA|
    |[ ]|
    |004a4500f04a4500000000000000000000adbeefdeadbeef|
    |0'''''''4'''''''8'''''''12''''''16''''''20''''''|

    |BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB|
    |[ ]|
    |00adbeefdeadbeef000000000000000000adbeefdeadbeef|
    |24''''''28''''''32''''''36''''''40''''''44''''''|

    |CCDDEEFF |
    |[][][][] |
    |00000000deadbeef|
    |48''''''52''''56|



    type_of<long long double>() == long long
    -------

    The memory dump of the A member (shallow_copy) shows a "f04a4500"
    pattern in the first chunk of unused memory (range [1-7]) while the
    second chunk (range [17-23]) shows the crunched "deadbeef" pattern.

    The B member (assign_copy) instead shows the "deadbeef" pattern in
    both chunks of unused memory.

    This all seems to confirm that the compiler-generated shallow copy
    does some kind of memcopy instead of performing a member to member
    assignment.

    Thanks a lot for your attention,
    best regards,
    Francesco
    --
    Francesco S. Carta, hobbyist
    http://fscode.altervista.org
    Francesco S. Carta, Sep 18, 2009
    #1
    1. Advertising

  2. On 18 Set, 12:44, "Francesco S. Carta" <> wrote:

    [snip]

    Ehm... some functions above are a bit silly, they use the input
    without checking for its validity, leading to infinite loops or huge
    output... hexbytes_w_brackets(), hexbyte_markers() and interleave()
    fixed accordingly. Never mind.

    Cheers,
    Francesco
    --
    Francesco S. Carta, hobbyist
    http://fscode.altervista.org
    Francesco S. Carta, Sep 18, 2009
    #2
    1. Advertising

  3. Francesco S. Carta

    James Kanze Guest

    On Sep 18, 12:44 pm, "Francesco S. Carta" <> wrote:

    > after the posts about dumping objects' raw memory I've played
    > with it a bit and I've come to discover that the
    > compiler-created shallow copy does some kind of memcopy on the
    > two objects, starting from [object's base address] up to [last
    > member address + last member size].


    No. The compiler created copy (assignment or copy constructor)
    invokes the copy constructor for each of its contained elements,
    starting with the direct base classes, and then the members,
    both in the declared order. (In the absense of virtual
    inheritance, which introduces some quirks, and has some
    unspecified behavior in the case of assignment.)

    > Meanwhile, I've discovered that the compiler accepts the "long
    > long double" type declaration but treats it as "long long
    > int".


    That's an error in the compiler. There's no place in the
    grammar where the sequence of tokens "long", "long", "double"
    would be legal.

    --
    James Kanze
    James Kanze, Sep 19, 2009
    #3
  4. On 19 Set, 12:22, James Kanze <> wrote:
    > On Sep 18, 12:44 pm, "Francesco S. Carta" <> wrote:
    >
    > > after the posts about dumping objects' raw memory I've played
    > > with it a bit and I've come to discover that the
    > > compiler-created shallow copy does some kind of memcopy on the
    > > two objects, starting from [object's base address] up to [last
    > > member address + last member size].

    >
    > No. The compiler created copy (assignment or copy constructor)
    > invokes the copy constructor for each of its contained elements,
    > starting with the direct base classes, and then the members,
    > both in the declared order. (In the absense of virtual
    > inheritance, which introduces some quirks, and has some
    > unspecified behavior in the case of assignment.)


    Hi James, thanks for dropping in.

    After a while I've come to the same conclusion of yours (if it were
    like I supposed - that is, doing _always_ some sort of memcopy - then
    string members, for instance, would have pointed to the same resource,
    which it isn't).

    But then, why am I reading that different pattern in the first unused
    chunk of memory, in the A member, there in my original post? Shouldn't
    there be a "deadbeef" pattern just like for the B member?

    The only difference between those two structs is that one has an
    explicit assignment operator implemented.

    Is it possible that the compiler-generated copy behaves differently
    with members of built-in type?

    > > Meanwhile, I've discovered that the compiler accepts the "long
    > > long double" type declaration but treats it as "long long
    > > int".

    >
    > That's an error in the compiler. There's no place in the
    > grammar where the sequence of tokens "long", "long", "double"
    > would be legal.


    I've searched for it and it happens to be a known bug - maybe it has
    been already fixed in later releases.

    Thanks again,
    Francesco
    --
    Francesco S. Carta, hobbyist
    http://fscode.altervista.org
    Francesco S. Carta, Sep 19, 2009
    #4
  5. On 19 Set, 13:39, "Francesco S. Carta" <> wrote:
    > On 19 Set, 12:22, James Kanze <> wrote:
    >
    > > On Sep 18, 12:44 pm, "Francesco S. Carta" <> wrote:

    >
    > > > after the posts about dumping objects' raw memory I've played
    > > > with it a bit and I've come to discover that the
    > > > compiler-created shallow copy does some kind of memcopy on the
    > > > two objects, starting from [object's base address] up to [last
    > > > member address + last member size].

    >
    > > No.  The compiler created copy (assignment or copy constructor)
    > > invokes the copy constructor for each of its contained elements,
    > > starting with the direct base classes, and then the members,
    > > both in the declared order.  (In the absense of virtual
    > > inheritance, which introduces some quirks, and has some
    > > unspecified behavior in the case of assignment.)

    >
    > Hi James, thanks for dropping in.
    >
    > After a while I've come to the same conclusion of yours (if it were
    > like I supposed - that is, doing _always_ some sort of memcopy - then
    > string members, for instance, would have pointed to the same resource,
    > which it isn't).
    >
    > But then, why am I reading that different pattern in the first unused
    > chunk of memory, in the A member, there in my original post? Shouldn't
    > there be a "deadbeef" pattern just like for the B member?
    >
    > The only difference between those two structs is that one has an
    > explicit assignment operator implemented.
    >
    > Is it possible that the compiler-generated copy behaves differently
    > with members of built-in type?


    For a more evident example, try adding the following instructions to
    the main() of the program I posted above:

    -------
    shallow_copy sh1, sh2;
    crunch(&sh1);
    crunch(&sh2, false, 0xEE);
    sh1.a = sh1.b = sh1.c = 0;
    sh2 = sh1;
    cout << dump_obj_memory(sh1) << endl;
    cout << dump_obj_memory(sh2) << endl;
    -------

    On my gcc 3.4.5 I get this output:

    -------
    00adbeefdeadbeef000000000000000000adbeefdeadbeef
    00adbeefdeadbeef000000000000000000eeeeeeeeeeeeee
    -------

    Seems clear that the compiler-created copy assignment copies unused
    memory.

    It's also evident that this has not very much to do with C++, sorry.

    Cheers,
    Francesco
    --
    Francesco S. Carta, hobbyist
    http://fscode.altervista.org
    Francesco S. Carta, Sep 19, 2009
    #5
  6. Francesco S. Carta

    James Kanze Guest

    On Sep 19, 1:39 pm, "Francesco S. Carta" <> wrote:
    > On 19 Set, 12:22, James Kanze <> wrote:


    > > On Sep 18, 12:44 pm, "Francesco S. Carta"
    > > <> wrote:


    > > > after the posts about dumping objects' raw memory I've
    > > > played with it a bit and I've come to discover that the
    > > > compiler-created shallow copy does some kind of memcopy on
    > > > the two objects, starting from [object's base address] up
    > > > to [last member address + last member size].


    > > No. The compiler created copy (assignment or copy
    > > constructor) invokes the copy constructor for each of its
    > > contained elements, starting with the direct base classes,
    > > and then the members, both in the declared order. (In the
    > > absense of virtual inheritance, which introduces some
    > > quirks, and has some unspecified behavior in the case of
    > > assignment.)


    > After a while I've come to the same conclusion of yours (if it
    > were like I supposed - that is, doing _always_ some sort of
    > memcopy - then string members, for instance, would have
    > pointed to the same resource, which it isn't).


    > But then, why am I reading that different pattern in the first
    > unused chunk of memory, in the A member, there in my original
    > post? Shouldn't there be a "deadbeef" pattern just like for
    > the B member?


    I didn't look at your example in detail; it was too long and I
    don't have that much time at present. But if it is a question
    of padding bytes, their values are unspecified (and
    theoretically at least, may even result trapping values if read
    as a type larger than a byte). The compiler has no constraints
    with regards to them. And it is a usual "optimization" to use
    something like memcpy for classes which have no non-trivial
    constructors, or even for parts of the class; e.g. the compiler
    might call the copy constructor of a base class, then use memcpy
    for the data of the derived class, if all of the data had
    trivial copy constructors.

    What you are guaranteed is that the values of the declared bases
    and members will be the same as if the compiler had called the
    copy constructor. Beyond that, the "as if" rule applies, as it
    does everywhere.

    > The only difference between those two structs is that one has
    > an explicit assignment operator implemented.


    > Is it possible that the compiler-generated copy behaves
    > differently with members of built-in type?


    Not with regards to the values of members and bases. The
    results must be the same. On the other hand, how the compiler
    arrives at those results is its business, and what happens to
    any padding in the process is unspecified.

    --
    James Kanze
    James Kanze, Sep 20, 2009
    #6
  7. On 20 Set, 09:44, James Kanze <> wrote:
    > On Sep 19, 1:39 pm, "Francesco S. Carta" <> wrote:
    >
    >
    >
    > > On 19 Set, 12:22, James Kanze <> wrote:
    > > > On Sep 18, 12:44 pm, "Francesco S. Carta"
    > > > <> wrote:
    > > > > after the posts about dumping objects' raw memory I've
    > > > > played with it a bit and I've come to discover that the
    > > > > compiler-created shallow copy does some kind of memcopy on
    > > > > the two objects, starting from [object's base address] up
    > > > > to [last member address + last member size].
    > > > No.  The compiler created copy (assignment or copy
    > > > constructor) invokes the copy constructor for each of its
    > > > contained elements, starting with the direct base classes,
    > > > and then the members, both in the declared order.  (In the
    > > > absense of virtual inheritance, which introduces some
    > > > quirks, and has some unspecified behavior in the case of
    > > > assignment.)

    > > After a while I've come to the same conclusion of yours (if it
    > > were like I supposed - that is, doing _always_ some sort of
    > > memcopy - then string members, for instance, would have
    > > pointed to the same resource, which it isn't).
    > > But then, why am I reading that different pattern in the first
    > > unused chunk of memory, in the A member, there in my original
    > > post? Shouldn't there be a "deadbeef" pattern just like for
    > > the B member?

    >
    > I didn't look at your example in detail; it was too long and I
    > don't have that much time at present.  But if it is a question
    > of padding bytes, their values are unspecified (and
    > theoretically at least, may even result trapping values if read
    > as a type larger than a byte).  The compiler has no constraints
    > with regards to them.  And it is a usual "optimization" to use
    > something like memcpy for classes which have no non-trivial
    > constructors, or even for parts of the class; e.g. the compiler
    > might call the copy constructor of a base class, then use memcpy
    > for the data of the derived class, if all of the data had
    > trivial copy constructors.
    >
    > What you are guaranteed is that the values of the declared bases
    > and members will be the same as if the compiler had called the
    > copy constructor.   Beyond that, the "as if" rule applies, as it
    > does everywhere.
    >
    > > The only difference between those two structs is that one has
    > > an explicit assignment operator implemented.
    > > Is it possible that the compiler-generated copy behaves
    > > differently with members of built-in type?

    >
    > Not with regards to the values of members and bases.  The
    > results must be the same.  On the other hand, how the compiler
    > arrives at those results is its business, and what happens to
    > any padding in the process is unspecified.


    Thanks for the further details and no problem about not thoroughly
    reading my code - actually the problem is that I added my remarks at
    the bottom but I didn't announce them at the top, 'twas fairly easy to
    overlook them.

    I know I have too much spare time under my hands, I don't really
    expect others to be necessarily in the same condition.

    Have good time,
    Francesco
    --
    Francesco S. Carta, hobbyist
    http://fscode.altervista.org
    Francesco S. Carta, Sep 20, 2009
    #7
    1. Advertising

Want to reply to this thread or ask your own question?

It takes just 2 minutes to sign up (and it's free!). Just click the sign up button to choose a username and then you can ask your own questions on the forum.
Similar Threads
  1. VisionSet
    Replies:
    8
    Views:
    4,875
    Tris Orendorff
    Apr 29, 2004
  2. Alex
    Replies:
    2
    Views:
    1,202
  3. Replies:
    1
    Views:
    529
    Victor Bazarov
    Jun 24, 2005
  4. Replies:
    26
    Views:
    2,089
    Roland Pibinger
    Sep 1, 2006
  5. Replies:
    2
    Views:
    2,203
    John Harrison
    May 26, 2007
Loading...

Share This Page