Is there in memory tree structure faster than std::map?

Melzzzzz · Aug 11, 2012

I have tried with AA tree (faster search slower insert), Btree (faster
search but slow erasing), general balanced tree (very slow inserts),
and finally my favorite Treap is slower for inserts and search
because it is not well balanced.

So I implemented scapegoat tree perfect balancing rebuilding of tree
during inserts for Treap, which repairs search but slows down insert
somewhat.
Sadly nothing I tried is faster on average than red black tree.

Here is my implementation of Treap with benchmark program so
if anyone wants to try I'd be rather interested to see results.

// bench program
// it is fair, as does searches after insert/erase operations,
// so in that case tree is least balanced

#include "Treap.h"
#include <map>
#include <vector>
#include <algorithm>
#include <cstdlib>
#include <iostream>
const int N = 100000;

int main()
{
std::vector<int> rnd1(N);
std::vector<int> rnd2(N);
for(int i = 0; i <N; ++i)
{
rnd1=i;
rnd2=i;
}
double sumtreap = 0,summap=0;
Treap<int,int> treap;
std::map<int,int> map;
clock_t begin,end;
double res;
for(int j = 0; j<3; ++j)
{
srand(time(0));
std::random_shuffle(rnd1.begin(),rnd1.end());
std::random_shuffle(rnd2.begin(),rnd2.end());
std::cout<<"------------------------------------\n";
begin = clock();
for(int i=0;i<N;++i)
{
treap[rnd1]++;
treap.erase(rnd2);
}
end = clock();
res = ((end-begin)/double(CLOCKS_PER_SEC));
sumtreap += res;
std::cout<<"treap insert/erase time "<<res<< "\n";

begin = clock();
for(int i=0;i<N;++i)
{
map[rnd1]++;
map.erase(rnd2);
}
end = clock();
res = ((end-begin)/double(CLOCKS_PER_SEC));
summap += res;
std::cout<<"map insert/erase time "<<res<< "\n\n";

begin = clock();
for(int i=0;i<N;++i)
{
treap.find(rnd2);
}
end = clock();
res = ((end-begin)/double(CLOCKS_PER_SEC));
sumtreap += res;
std::cout<<"treap search time "<<res<< "\n";

begin = clock();
for(int i=0;i<N;++i)
{
map.find(rnd2);
}
end = clock();
res = ((end-begin)/double(CLOCKS_PER_SEC));
summap += res;
std::cout<<"map search time "<<res<< "\n\n";

begin = clock();
for(int i=0;i<N;++i)
{
treap[rnd1]++;
}
end = clock();
res = ((end-begin)/double(CLOCKS_PER_SEC));
sumtreap += res;
std::cout<<"treap insert time "<<res<< "\n";

begin = clock();
for(int i=0;i<N;++i)
{
map[rnd1]++;
}
end = clock();
res = ((end-begin)/double(CLOCKS_PER_SEC));
summap += res;
std::cout<<"map insert time "<<res<< "\n\n";

begin = clock();
for(int i=0;i<N/2;++i)
{
treap.erase(rnd2);
}
end = clock();
res = ((end-begin)/double(CLOCKS_PER_SEC));
sumtreap += res;
std::cout<<"treap erase time "<<res<< "\n";

begin = clock();
for(int i=0;i<N/2;++i)
{
map.erase(rnd2);
}
end = clock();
res = ((end-begin)/double(CLOCKS_PER_SEC));
summap += res;
std::cout<<"map erase time "<<res<< "\n\n";
}
std::cout<<"------------------------------------";
std::cout<<"\ntreap time "<<sumtreap<<"\n";
std::cout<<"\nmap time "<<summap<<"\n";
}

// Treap.h
#ifndef TREAP_H
#define TREAP_H
#include <functional>
#include <cstddef>
#include <utility>
#include <cassert>
#include <ctime>
#include <string>
#include <vector>
#include <sstream>
#include <iostream>
#include <cstdlib>

template <class K,class V, class Compare = std::less<K> >
class Treap{
public:
Treap(unsigned seed = time(0))
: root_(0),size_(0),counter_(0), seed_(seed),inserts_(0),max_size_(0)
{
}
~Treap()
{
delete root_;
}
size_t size()const{ return size_; }
std:air<K,V>& insert(const std:air<K,V>& arg)
{
return insert_priv(arg);
}
V& operator[](const K& k)
{
return insert(std:air<K,V>(k,V())).second;
}
V* find(const K& k)const;
size_t depth()const
{
size_t fd,ld;
first(root_,fd);
last(root_,ld);
if(fd>ld)return fd;
else return ld;
}
void erase(const K& key)
{
remove(key);
}
std::string toString()const
{
std::string tmp;
if(root_ == 0)tmp.append("Tree has no nodes");
else
{
tmp = toString(root_,"",true);
}
return tmp;
}
bool validate()const
{
return validate(root_);
}
size_t counter()const
{
return counter_;
}
void reset_counter()
{
counter_=0;
}
void clear()
{
delete root_;
size_ = 0;
root_ = 0;
}
void perfect_balance()
{
perfect_balance(root_);
}
template <class F>
void for_each(F f)
{
for_each(root_,f);
}
void weight(size_t& left,size_t& right)
{
if(!root_)
{
left=right=0;
return;
}
left = weight(root_->left_);
right = weight(root_->right_);
}
private:
struct Node{
Node(const std:air<K,V>& data, Treap& trp)
: parent_(0),left_(0),right_(0),priority_(trp.prn()),data_(data)
{
}
~Node()
{
delete left_; delete right_;
}
Node* rotate_left()
{
Node* n = left_;
//std::cout<<"rotate left\n"<<toString(this,"",true)<<"\n";
if(n == 0)
{
return 0;
}

left_ = n->right_;
if(left_)left_->parent_ = this;

n->right_ = this;
if(parent_)
{
if(parent_->left_ == this)
{
parent_->left_ = n;
n->parent_ = parent_;
}
else
{
if(parent_->right_ != this)
{
std::cout<<"rotate right failed\nchild\n"<<toString(this,"",true)<<"\nparent\n"<<toString(parent_,"",true)<<"\n";
abort();
}
parent_->right_ = n;
n->parent_ = parent_;
}
}
else
{
n->parent_ = 0;
}
parent_ = n;

return n;
}
Node* rotate_right()
{
Node* n = right_;
//std::cout<<"rotate right\n"<<toString(this,"",true)<<"\n";
if(n == 0)
{
return 0;
}
right_ = n->left_;
if(right_)right_->parent_ = this;

n->left_ = this;
if(parent_)
{
if(parent_->left_ == this)
{
parent_->left_ = n;
n->parent_ = parent_;
}
else
{
if(parent_->right_ != this)
{
std::cout<<"rotate right failed\nchild\n"<<toString(this,"",true)<<"\nparent\n"<<toString(parent_,"",true)<<"\n";
abort();
}
parent_->right_ = n;
n->parent_ = parent_;
}
}
else
{
n->parent_ = 0;
}
parent_ = n;

return n;
}
Node *parent_,*left_,*right_;
unsigned priority_;
std:air<K,V> data_;
};
size_t weight(Node* n)
{
if(!n)return 0;
return weight(n->left_)+weight(n->right_)+1;
}
template <class F>
void for_each(Node* n, F f)
{
if(!n)return;
for_each(n->left_,f);
f(n->data_);
for_each(n->right_,f);
}
unsigned prn()
{
return rand_r(&seed_);
}
Node* first(Node* n,size_t& depth)const
{
Node *rc = 0;
depth = 0;
while(n)
{
++depth;
rc = n;
n = n->left_;
}
return rc;
}
Node* last(Node* n,size_t& depth)const
{
Node* rc = 0;
depth = 0;
while(n)
{
++depth;
rc = n;
n = n->right_;
}
return rc;
}
std:air<K,V>& insert_priv(const std:air<K,V>& ins_value,Node* node =0)
{
if(root_ == 0)
{
if(!node)
root_ = new Node(ins_value,*this);
else
{
root_ = node;
node->parent_ = node->left_ = node->right_ = 0;
}
++size_;
return root_->data_;
}
Compare cmp;
Node* n = root_;
Node *rc = 0, *prev = 0;
bool ret;
while(n)
{
prev = n;
ret = cmp(ins_value.first,n->data_.first);
if(ret)
{
n=n->left_;
}
else
{
rc = n;
n = n->right_;
}
}
if(rc && !cmp(rc->data_.first,ins_value.first))
{
return rc->data_;
}

if(ret)
{
if(!node)
{
rc = prev->left_ = new Node(ins_value,*this);
}
else
{
rc = prev->left_ = node;
rc->parent_ = rc->left_ = rc->right_ = 0;
}
prev->left_->parent_ = prev;
}
else
{
if(!node)
{
rc = prev->right_ = new Node(ins_value,*this);
}
else
{
rc = prev->right_ = node;
rc->parent_ = rc->left_ = rc->right_ = 0;
}
prev->right_->parent_ = prev;
}

if(!node && inserts_>max_size_)
{
inserts_ = 0;
max_size_ = size_*2;
perfect_balance();
}
else
{
n = rc;
rebalance_up(n);
++size_;
++inserts_;
}
return rc->data_;
}
void remove(const K& rem_value)
{
Compare cmp;
Node* rc = 0,*n = root_;
while(n)
{
bool ret = cmp(rem_value,n->data_.first);
if(ret)
{
n = n->left_;
}
else
{
rc = n;
n = n->right_;
}
}
if(!rc || cmp(rc->data_.first,rem_value))return;

Node* reb = 0;
while(rc->left_ && rc->right_)
{
Node* n = rc->rotate_right();
if(root_ == rc)root_ = n;
if(!reb && n->left_ && n->left_->priority_ < n->priority_)reb = n;
}
Node** parent_node = 0;
if(rc->parent_)
parent_node = ((rc->parent_->left_ == rc)?&rc->parent_->left_:&rc->parent_->right_);
if(rc->left_ == 0 && rc->right_ == 0)
{
if(parent_node)*parent_node = 0;
else root_ = 0;
}
else
if(rc->left_ == 0)
{
if(parent_node)
{
*parent_node = rc->right_;
rc->right_->parent_ = rc->parent_;
}
else
{
root_ = rc->right_;
rc->right_->parent_ = 0;
}
rc->right_ = 0;
}
else
if(rc->right_ == 0)
{
if(parent_node)
{
*parent_node = rc->left_;
rc->left_->parent_ = rc->parent_;
}
else
{
root_ = rc->left_;
rc->left_->parent_ = 0;
}
rc->left_ = 0;
}
delete rc;
--size_;
if(max_size_>0)--max_size_;
rebalance_left(reb);

}
bool validate(const Node* node)const
{
if(!node)return true;
Compare cmp;
if(node->left_ && !cmp(node->left_->data_.first,node->data_.first))
{
return false;
}
if(node->right_ && !cmp(node->data_.first,node->right_->data_.first))
{
return false;
}
if(node->left_ && node->left_->parent_ != node)
{
return false;
}
if(node->right_ && node->right_->parent_ != node)
{
return false;
}
if(node->left_ && node->priority_ > node->left_->priority_)
{
return false;
}
if(node->right_ && node->priority_ > node->right_->priority_)
{
return false;
}
bool rc1 = validate(node->left_);
bool rc2 = validate(node->right_);
return rc1 && rc2;

}
void rebalance_left(Node* node)
{
if(!node)return;
rebalance_left(node->left_);
while(node->left_ && node->left_->priority_ < node->priority_)
{
Node* n = node->rotate_left();
if(node == root_)root_ = n;
}
}
void rebalance_right(Node* node)
{
if(!node)return;
rebalance_right(node->right_);
while(node->right_ && node->right_->priority_ < node->priority_)
{
Node* n = node->rotate_right();
if(node == root_)root_ = n;
}
}
void rebalance_up(Node* n)
{
while(n->parent_ && n->priority_ < n->parent_->priority_)
{
if(n->parent_->left_ == n)
n = n->parent_->rotate_left();
else
n = n->parent_->rotate_right();
if(n->parent_ == 0)root_ = n;
}
}
struct P{
P(Treap& t):t(t),weight(0){v.reserve(t.size());}
void operator()(Node* n)
{
v.push_back(n);
++weight;
}
Treap& t;
std::vector<Node*> v;
size_t weight;
};
template <class F>
void for_each_node(Node* n, F& f)
{
if(!n)return;
for_each_node(n->left_,f);
f(n);
for_each_node(n->right_,f);
}
void perfect_balance(Node *n)
{
if(!n)return;
P p(*this);
for_each_node(n,p);
Node** link = 0, *parent=0;
if(n->parent_)
{
if(n->parent_->left_ == n)
link = &n->parent_->left_;
else
link = &n->parent_->right_;
size_ -= p.weight;
parent = n->parent_;
}
else
{
link = &root_;
size_ = 0;
}
Treap<K,V,Compare> tmp;
unsigned prio = parent?parent->priority_+100000:0;
tmp.insert_divide(0,p.v.size(),p.v,prio);
*link = tmp.root_;
if(parent)tmp.root_->parent_ = parent;
size_ += tmp.size_;
tmp.root_ = 0;
}
void insert_divide(size_t b, size_t e,std::vector<Node*>& v,size_t prio)
{
if(e == b)return;
Node* p = v[(b+e)/2];
p->priority_ = prio;
insert_priv(p->data_,p);
insert_divide(b,(e+b)/2,v,prio+100000);
insert_divide((e+b)/2+1,e,v,prio+100000);
}
static std::string toString(const Node* node, const std::string& prefix, bool isTail)
{
std::string tmp;
tmp.append(prefix).append(isTail ? "â””â”€â”€ " : "â”œâ”€â”€ ");

if(!node)
{
tmp.append(" null");
return tmp;
}

std:stringstream oss;
const std:air<K,V>& v = node->data_;
oss<<"p:"<<node->priority_<<" ("<<v.first<<","<<v.second<<")";
tmp.append(oss.str());
oss.str("");
tmp.append("\n");

tmp.append(toString(node->left_,prefix + (isTail?" ":"â”‚ "),false));
tmp.append("\n");

tmp.append(toString(node->right_,prefix + (isTail ? " " : "â”‚ "),true));
return tmp;
}
public:
class iterator{
public:
iterator(Node* n):node_(n){}
std:air<K,V>& operator*()const{ return node_->data_; }
std:air<K,V>* operator->()const{ return &node_->data_; }
bool operator==(const iterator& other)const
{
return node_ == other.node_;
}
bool operator!=(const iterator& other)const
{
return !(*this == other);
}
iterator& operator++()
{
if(node_->right_ != 0)
{
node_ = node_->right_;
while(node_->left_ != 0)
{
node_ = node_->left_;
}
}
else
{
Node* tmp = node_->parent_;
while(tmp && node_ == tmp->right_)
{
node_ = tmp;
tmp = tmp->parent_;
}
if(node_->right_ != tmp)
{
node_ = tmp;
}
}
return *this;
}
private:
Node* node_;
};
iterator begin()
{
size_t depth = 0;
return iterator(first(depth));
}
iterator end()const
{
return iterator(0);
}
private:
Node* root_;
size_t size_,counter_;
unsigned seed_;
unsigned inserts_,max_size_;
bool flip_;
};

template <class K,class V, class Compare>
V* Treap<K,V,Compare>::find(const K& k)const
{
Compare cmp;
Node* n = root_,*tmp = 0;
V* rc = 0;
while(n)
{
bool ret = cmp(k,n->data_.first);
if(ret)
{
n = n->left_;
}
else
{
tmp = n;
n = n->right_;
}
}
if(tmp && !cmp(tmp->data_.first,k))
{
rc = &tmp->data_.second;
}
return rc;
}

#endif

Melzzzzz · Aug 11, 2012

const int N = 100000;

Increase N to at least one million to see the difference.

Juha Nieminen · Aug 12, 2012

Melzzzzz said:
Sadly nothing I tried is faster on average than red black tree.

The biggest bottleneck in adding and removing elements to/from a tree is
not the balancing. It's memory allocation.

Suppose you do just this:

std::set<int> s;
for(int i = 0; i < 100000000; ++i) s.insert(i);

Where do you think those insertions spend most of the time? That's right:
Memory allocation.

Depending on the system I have measured from about 50% all the way up
to about 90% of the time in those insertions is spent purely in memory
allocation.

That's one of the major reasons why hash tables are faster than trees:
They allocate memory in chunks rather than allocating each element
individually. (Ok, there are like a hundred different ways of implementing
a hash table, but most of them do it by using arrays.)

Melzzzzz · Aug 12, 2012

The biggest bottleneck in adding and removing elements to/from a tree
is not the balancing. It's memory allocation.

Suppose you do just this:

std::set<int> s;
for(int i = 0; i < 100000000; ++i) s.insert(i);

Where do you think those insertions spend most of the time? That's
right: Memory allocation.

Depending on the system I have measured from about 50% all the way up
to about 90% of the time in those insertions is spent purely in memory
allocation.

That's one of the major reasons why hash tables are faster than trees:
They allocate memory in chunks rather than allocating each element
individually.

Well yes and no. Hash tables are arrays of arrays or arrays of lists,
but they don't store nodes directly in array/vector rather pointers.
Number of allocations for nodes is same as for trees + vector.
Trick is that hash insertion is O(1) while erasing is same
as search O(1) on average, but worst case O(n).

(Ok, there are like a hundred different ways of

implementing a hash table, but most of them do it by using arrays.)

Yes

Juha Nieminen · Aug 13, 2012

Leigh Johnston said:
Three words: custom pool allocator.

It can be difficult to match the compiler's (well, libc's) own allocator,
unless you are willing to make compromises eg. in terms of thread safety.

From my experiments I think that the major reason (or one of the major
reasons) for the slowness of the standard allocator is that it's
thread-safe. If you make a similar allocator that doesn't care about
thread-safety, it can easily be twice as fast, if not even more. (Of
course another option is to try to make a fast lock-free allocator...)

Another major problem is that the standard allocator isn't (and really
can't be) very cache-friendly: As blocks are allocated and deallocated
during the execution of the program, the memory will become more and more
fragmented, and the allocation of subsequent blocks more and more
randomized. This is a cache-killer. If you want to fix this, you'll have
to find some way of defragmenting and compacting the memory once in a
while, which can be really difficult due to how a typical C/C++ program
works.

Juha Nieminen · Aug 13, 2012

Scott Lurndal said:
A custom pool allocator (defined: an allocator that returns fixed sized
elements from a pre-allocated array of elements) has a fixed cost O(1) for
both allocation and deallocation. One cannot get more efficient.

I can make an allocator that does allocations in O(1) and is a million
times slower than the standard allocator. "O(1)" tells absolutely nothing.

Adding thread safety to a pool allocator is trivial and has no impact on
performance (in the non-contended case).

Adding thread safety is not trivial and it will have a big impact on its
performance (unless you manage to create a lock-free, wait-free allocator,
which is *far* from being trivial).

What makes you think that locking has no impact in performance? The whole
industry is researching lock-free algorithms like mad precisely because
locking is a really heavy operation. Why would they otherwise?

This seems unlikely.

"Seems unlikely." Well argumented. Care to back that gut feeling up with
something more concrete?

I have done some actual measurements.

Can you elaborate on this? How, exactly does this "kill" a cache?

It's just an expression. It means that operating on a heavily fragmented
memory causes lots of cache misses, nullifying the benefits of the cache,
killing performance.

A modern set-associative cache is designed to operate efficiently with
random data placement. Modern processor prefetching logic handles strided
accesses quite well.

The processor cannot guess in advance where the memory allocator is going
to jump next. If it jumps to an uncached part of RAM, then it will be
slow.

It seems to me that what you are saying is that the standard allocator is
not slow because of it being thread-safe and cache-unfriendly. If not, then
please explain to me your hypothesis of why it's slow.

C'est what?

If you don't understand what memory compaction is, and why it's so hard to
do in a C++ program, then perhaps you have no business in discussing why
memory allocators are so slow and how they can be improved.

Jorgen Grahn · Aug 13, 2012

You've not presented any concrete data to support your conclusion that
"the slowness of the standard allocator is caused by thread-safety".

Please provide the methods used to measure and the final measurements.

And/or references to others saying the same thing. I have to agree:
it sounds unlikely, and I'd like to know how much truth there is in
it.

Unlikely because if I look at the Linux side of things, plenty of
programmers don't use threads, and intensely dislike threads. Seems
unlikely that they'd really tolerate a significant performance hit in
the C library for such a thing.

/Jorgen

Melzzzzz · Aug 13, 2012

It can be difficult to match the compiler's (well, libc's) own
allocator, unless you are willing to make compromises eg. in terms of
thread safety.

From my experiments I think that the major reason (or one of the major
reasons) for the slowness of the standard allocator is that it's
thread-safe. If you make a similar allocator that doesn't care about
thread-safety, it can easily be twice as fast, if not even more. (Of
course another option is to try to make a fast lock-free allocator...)

I think that standard allocator usually uses thread cache pools so there
is no locking overhead on average. I have posted here ,some time ago,
simplest pool allocator that uses pthread keys, so it is lock free.
I think that this technique is employed in standard allocators, too.

Another major problem is that the standard allocator isn't (and really
can't be) very cache-friendly: As blocks are allocated and deallocated
during the execution of the program, the memory will become more and
more fragmented, and the allocation of subsequent blocks more and more
randomized. This is a cache-killer.

Cache performance does not depends on allocator rather on how
program accesses allocated blocks.

If you want to fix this, you'll

have to find some way of defragmenting and compacting the memory once
in a while, which can be really difficult due to how a typical C/C++
program works.

That is another subject. I don;t think that it helps with a cache at
all.

Juha Nieminen · Aug 14, 2012

Jorgen Grahn said:
Unlikely because if I look at the Linux side of things, plenty of
programmers don't use threads, and intensely dislike threads. Seems
unlikely that they'd really tolerate a significant performance hit in
the C library for such a thing.

Your argument why thread-safety in the standard allocator is unlikely to
be a significant source of inefficiency is because lots of linux
programmers don't like threads?

That's actually one of the most hilarious arguments for *anything* that
I have read in a long time.

Juha Nieminen · Aug 14, 2012

Scott Lurndal said:
Having written many, highly efficient, pool allocators for 64 to 1024-core
systems over the last 20 years, I'm not sure what you're attempting to
get at here. A simple fixed-sized pool allocator delinks the top element
and returns it. The deallocator relinks the element at either the front
or back of the list (each have benefits). In both cases, this is a pair
of memory accesses. Highly efficient.

If the cause for slowness is thread-safety and memory fragmentation, it
doesn't matter how you manage the allocated blocks. Your "O(1)" tells
absolutely nothing.

Even running a compaction sweep from time to time can make the allocator
significantly faster overall (because the sweep removes the randomness of
the free blocks), even though the sweep itself is linear-time. Yes, I have
experimented with this.

The industry is researching many facets of parallel programming, and as processor
counts become larger, formerly efficient algorithms are not scaling, so
those algorithms are being tweaked or supplanted (RCU, for example).

What does that have to do with what I said above?

First, who cares where the memory allocator is going to "jump next". The
interesting issue is when the allocated memory is used, not when it is
allocated. Note that a pool allocator need never touch the memory that
is being allocated (c.f. bitmap allocator), so the only cache issue occurs
when the allocated memory is being touched.

I really don't unserstand what you are saying here. Are you basing your
speed measurements on simply updating the allocation lists but never
touching the allocated memory? No wonder you are not seeing any efficiency
problems with memory fragmentation.

An actual program that uses the allocator is *always* going to immediately
write to the allocated memory block (especially in C++). If that memory
block is not in the cache, it's going to be slow. The CPU cannot magically
know where the next write will happen to and prepare in advance for it.

When using the standard allocator, if you use much more memory that fits in
the caches, and you need to do lots of allocations and deallocations at
random, one of the major sources of inefficiency is precisely the random
distribution of free blocks that ensues. (Obviously if you only use as
much memory as fill fit in the caches, then it will be much more
efficient.) When the list of free blocks is random, there's a high chance
that the next allocated block will not be cached.

You are telling me that the list of free blocks being randomized is actually
a good thing.

Juha Nieminen · Aug 14, 2012

Melzzzzz said:
Cache performance does not depends on allocator rather on how
program accesses allocated blocks.

The speed of the allocator (from the perspective of the CPU caches) comes
into play if the program allocates and deallocates blocks very frequently.

Naturally if the program allocates memory in big bunches and then spends
a lot of time operating on that memory (without doing new allocations or
deallocations) then of course it depends on how the program uses that
memory (ie. how cache-local its operations are).

The thing is, doing a simple 'new' millions of times in a row (eg. to
create a big list or a set) can be horrendously slow. For example,
creating a list of ten million elements can take several seconds
(depending on the system, of course), 99% of that time being spent on
allocation alone, while creating a vector of ten million elements takes
virtually no time at all.

(I have noticed a significant improvement in relative allocation speed
(ie. in % of time spent in allocation as compared to everything else) when
using a 64-bit Linux in a 64-bit Intel system, as compared to a 32-bit
Linux running on a 32-bit system, which is cool. I don't know, however,
if this is caused by the hardware being more efficient or the 64-bit version
of the standard clib allocator being more efficient, or a combination of
both. It can still be annoyingly slow, though, although not that much
anymore.)

The lesson is: If possible, minimize the amount of allocations and
deallocations in your program.

Jorgen Grahn · Aug 14, 2012

Your argument why thread-safety in the standard allocator is unlikely to
be a significant source of inefficiency is because lots of linux
programmers don't like threads?

That's actually one of the most hilarious arguments for *anything* that
I have read in a long time.

You are easily amused. Software features are sometimes driven by user
requirements, are they not?

Anyway, that was just my explanation why I'm interested in your
results.

/Jorgen

I am using 3D map generation with noise, the texture does not match the model.	1	Feb 11, 2026
Chatbot	0	Oct 8, 2024
RSA implementation issues in public key pem loader function	0	May 21, 2025
Universal BMP Steganography Tool (AES-128-CTR + SP800-90A CSPRNG) Full Encoder/Decoder with 3LSB Payload, PasswordDerived Key & External Key File	4	Mar 26, 2026
Code critique please	2	Oct 30, 2004
Mini Web Server in C++ (Part One)	4	Oct 2, 2025
Rich Text Format (RTF) Document Builder in C++: Code and Features	0	Sep 28, 2025
Database Manager: A C++ Console Application	14	May 12, 2025

Is there in memory tree structure faster than std::map?

Melzzzzz

Melzzzzz

Juha Nieminen

Melzzzzz

Juha Nieminen

Juha Nieminen

Jorgen Grahn

Melzzzzz

Juha Nieminen

Juha Nieminen

Juha Nieminen

Jorgen Grahn

Ask a Question

Similar Threads

Members online

Forum statistics

Latest Threads