PATCH to make internal Hash class retain order...

T

Thies C. Arntzen

Hi there -

beeing new to ruby (thru rails) and coming from a strong php background

i really miss beeing able to retain order in a Hash. i know that this
has been discussed in the past i also understand that i can always
write my own OrderedHash implemenation in user-space. -but- as a) i
love to poke around b) userspace was too slow for some of my problems
-and- c) it's always best to to let "code speak" i decided to "scratch
my itch" and wrote a patch (against 1.8.4) that will:

* remember the order in which elements have been added to a Hash (at
the cost of 2 extra pointers in every hash-bucket + 2 extra pointers
per hash-table)
* use this order for iterating (st_foreach will use this order)
* implement a call Hash.order! allows the user to change the internal
order;-)

y = { 'd' => '4', 'a' => '1', 'b' => '2', 'c' => '3' }
puts y.inspect

y.order!
puts y.inspect

y.order! {|a,b| b[1] <=> a[1]}
puts y.inspect

will now get you:
{"d"=>"4", "a"=>"1", "b"=>"2", "c"=>"3"}
{"a"=>"1", "b"=>"2", "c"=>"3", "d"=>"4"}
{"d"=>"4", "c"=>"3", "b"=>"2", "a"=>"1"}


please bear with me as i'm not fully used to all the conventions you
use in the code - also this has only seen minimal testing...

suggestions welcome!

regards, tc

--- snipp --

diff -ru ruby-1.8.4/hash.c ruby-thies/hash.c
--- ruby-1.8.4/hash.c 2005-07-19 10:25:37.000000000 +0200
+++ ruby-thies/hash.c 2006-08-12 17:28:56.000000000 +0200
@@ -1195,6 +1195,46 @@
return entries;
}

+/*
+ * call-seq:
+ * hsh.order! => hash
+ * hsh.order! {|a,b| block } => hash
+ *
+ * Orders <i>hsh</i> according to code <code>[</code> <i>key,
+ * value</i> <code>]</code>. If no block is given the sort order
+ * defaults to the keyorder.
+ *
+ * h = { "b" => 30, "c" => 10, "a" => 20 }
+ * h.order! #=> { "c" => 10, "a" => 20, "b"
=> 30 }
+ * h.order! {|a,b| a[0]<=>b[0]} #=> { "a" => 20, "b" => 30, "c"
=> 10 }
+ *
+ */
+
+static VALUE
+rb_hash_order(hash)
+ VALUE hash;
+{
+ VALUE *keyorder;
+ VALUE entries = rb_hash_to_a(hash);
+ int i;
+
+ rb_ary_sort_bang(entries);
+
+ keyorder = ALLOC_N(VALUE, RARRAY(entries)->len);
+ if (! keyorder) {
+ return 0;
+ }
+
+ for (i = 0; i < RARRAY(entries)->len; i++) {
+ keyorder = RARRAY(RARRAY(entries)->ptr)->ptr[0];
+ }
+
+ st_reorder(RHASH(hash)->tbl, keyorder);
+ free(keyorder);
+
+ return hash;
+}
+
static int
inspect_i(key, value, str)
VALUE key, value, str;
@@ -2449,6 +2489,7 @@
rb_define_method(rb_cHash,"each_key", rb_hash_each_key, 0);
rb_define_method(rb_cHash,"each_pair", rb_hash_each_pair, 0);
rb_define_method(rb_cHash,"sort", rb_hash_sort, 0);
+ rb_define_method(rb_cHash,"order!", rb_hash_order, 0);

rb_define_method(rb_cHash,"keys", rb_hash_keys, 0);
rb_define_method(rb_cHash,"values", rb_hash_values, 0);
diff -ru ruby-1.8.4/rubytest.rb ruby-thies/rubytest.rb
--- ruby-1.8.4/rubytest.rb 2005-02-06 18:03:32.000000000 +0100
+++ ruby-thies/rubytest.rb 2006-08-12 15:04:15.000000000 +0200
@@ -42,6 +42,7 @@
print "test succeeded\n"
exit 0
end
+ puts line
error << line if line =~ %r:^(sample/test.rb|not):
end
print error
Only in ruby-thies: signal.o
Only in ruby-thies: sprintf.o
diff -ru ruby-1.8.4/st.c ruby-thies/st.c
--- ruby-1.8.4/st.c 2005-12-20 05:13:21.000000000 +0100
+++ ruby-thies/st.c 2006-08-12 18:05:55.000000000 +0200
@@ -13,11 +13,25 @@

typedef struct st_table_entry st_table_entry;

+#define ST_APPEND_TO_GLOBAL_ORDER(tbl, entry)\
+ (entry)->gnext = NULL;\
+ (entry)->gprev = (tbl)->tail;\
+ if ((entry)->gprev) {\
+ (entry)->gprev->gnext = (entry);\
+ }\
+ (tbl)->tail = (entry);\
+ if (! (tbl)->head) {\
+ (tbl)->head = (entry);\
+ }
+
+
struct st_table_entry {
unsigned int hash;
st_data_t key;
st_data_t record;
st_table_entry *next;
+ st_table_entry *gprev;
+ st_table_entry *gnext;
};

#define ST_DEFAULT_MAX_DENSITY 5
@@ -135,6 +149,52 @@
}
#endif

+static int
+st_unlink_entry(table, entry)
+ st_table *table;
+ st_table_entry *entry;
+{
+ int i;
+ st_table_entry *last, *ptr;
+
+ for (i = 0; i < table->num_bins; i++) {
+ last = NULL;
+ ptr = table->bins;
+ while (ptr) {
+ if (ptr == entry) break; /* found */
+ last = ptr;
+ ptr = ptr->next;
+ }
+ if (! ptr) continue; /* correct bin it yet to be found */
+
+ if (last) {
+ last->next = ptr->next;
+ } else {
+ table->bins = ptr->next;
+ }
+
+ if (table->head == ptr) {
+ table->head = ptr->gnext;
+ }
+ if (table->tail == ptr) {
+ table->tail = ptr->gprev;
+ }
+
+ if (ptr->gnext) {
+ ptr->gnext->gprev = ptr->gprev;
+ }
+
+ if (ptr->gprev) {
+ ptr->gprev->gnext = ptr->gnext;
+ }
+
+ table->num_entries--;
+ return 1;
+ }
+ return 0;
+}
+
+
st_table*
st_init_table_with_size(type, size)
struct st_hash_type *type;
@@ -157,6 +217,8 @@
tbl->num_bins = size;
tbl->bins = (st_table_entry **)Calloc(size,
sizeof(st_table_entry*));

+ tbl->head = tbl->tail = NULL;
+
return tbl;
}

@@ -269,6 +331,7 @@
entry->record = value;\
entry->next = table->bins[bin_pos];\
table->bins[bin_pos] = entry;\
+ ST_APPEND_TO_GLOBAL_ORDER(table, entry)\
table->num_entries++;\
} while (0)

@@ -339,39 +402,74 @@
{
st_table *new_table;
st_table_entry *ptr, *entry;
- int i, num_bins = old_table->num_bins;
+ int num_bins = old_table->num_bins;
+ unsigned int bin_pos;

- new_table = alloc(st_table);
+ new_table = st_init_table_with_size(old_table->type,
old_table->num_bins);
if (new_table == 0) {
return 0;
}

- *new_table = *old_table;
- new_table->bins = (st_table_entry**)
- Calloc((unsigned)num_bins, sizeof(st_table_entry*));
+ ptr = old_table->head;
+ while (ptr) {
+ entry = alloc(st_table_entry);
+ if (! entry) {
+ st_free_table(new_table); /* safe as new_table is consistent */
+ return 0;
+ }
+ *entry = *ptr;
+ bin_pos = entry->hash % num_bins;
+ entry->next = new_table->bins[bin_pos];
+ new_table->bins[bin_pos] = entry;
+ ST_APPEND_TO_GLOBAL_ORDER(new_table, entry);
+ new_table->num_entries++;

- if (new_table->bins == 0) {
- free(new_table);
- return 0;
+ ptr = ptr->gnext;
}

- for(i = 0; i < num_bins; i++) {
- new_table->bins = 0;
- ptr = old_table->bins;
- while (ptr != 0) {
- entry = alloc(st_table_entry);
- if (entry == 0) {
- free(new_table->bins);
- free(new_table);
- return 0;
- }
- *entry = *ptr;
- entry->next = new_table->bins;
- new_table->bins = entry;
- ptr = ptr->next;
+ return new_table;
+}
+
+void
+st_reorder(table, keylist)
+ st_table *table;
+ st_data_t *keylist;
+{
+ st_table buf_table, *org_table;
+ st_table_entry *entry;
+ st_data_t key;
+ unsigned int i, hash_val, bin_pos;
+
+ /*
+ * "algorithmus":
+ * 1 remeber old table content
+ * 2 reinitialize old_table to be empty
+ * 3 refill old table in corrent order (supplied by keylist) using

the old content
+ */
+
+ buf_table = *table;
+ org_table = &buf_table;
+
+ table->num_entries = 0;
+ table->bins = (st_table_entry **)Calloc(table->num_bins,
sizeof(st_table_entry*));
+ table->head = table->tail = NULL;
+
+ for (i = 0; i < org_table->num_entries; i++) {
+ key = keylist;
+ hash_val = do_hash(key, org_table);
+ FIND_ENTRY(org_table, entry, hash_val, bin_pos);
+ if (! entry) {
+ /* should not happen */
+ printf("uoooo...\n");
}
+
+ entry->next = table->bins[bin_pos];
+ table->bins[bin_pos] = entry;
+ ST_APPEND_TO_GLOBAL_ORDER(table, entry);
+ table->num_entries++;
}
- return new_table;
+
+ free(org_table->bins);
}

int
@@ -381,39 +479,26 @@
st_data_t *value;
{
unsigned int hash_val;
- st_table_entry *tmp;
register st_table_entry *ptr;

hash_val = do_hash_bin(*key, table);
ptr = table->bins[hash_val];

- if (ptr == 0) {
- if (value != 0) *value = 0;
- return 0;
+ while (ptr) {
+ if (EQUAL(table, ptr->key, *key)) break;
+ ptr = ptr->next;
}

- if (EQUAL(table, *key, ptr->key)) {
- table->bins[hash_val] = ptr->next;
- table->num_entries--;
+ if (ptr) {
+ st_unlink_entry(table, ptr);
if (value != 0) *value = ptr->record;
*key = ptr->key;
free(ptr);
return 1;
+ } else {
+ if (value != 0) *value = 0;
+ return 0;
}
-
- for(; ptr->next != 0; ptr = ptr->next) {
- if (EQUAL(table, ptr->next->key, *key)) {
- tmp = ptr->next;
- ptr->next = ptr->next->next;
- table->num_entries--;
- if (value != 0) *value = tmp->record;
- *key = tmp->key;
- free(tmp);
- return 1;
- }
- }
-
- return 0;
}

int
@@ -429,22 +514,21 @@
hash_val = do_hash_bin(*key, table);
ptr = table->bins[hash_val];

- if (ptr == 0) {
- if (value != 0) *value = 0;
- return 0;
+ while (ptr) {
+ if ((ptr->key != never) && EQUAL(table, ptr->key, *key)) break;
+ ptr = ptr->next;
}

- for(; ptr != 0; ptr = ptr->next) {
- if ((ptr->key != never) && EQUAL(table, ptr->key, *key)) {
- table->num_entries--;
- *key = ptr->key;
- if (value != 0) *value = ptr->record;
- ptr->key = ptr->record = never;
- return 1;
- }
+ if (ptr) {
+ table->num_entries--;
+ if (value != 0) *value = ptr->record;
+ *key = ptr->key;
+ free(ptr);
+ return 1;
+ } else {
+ if (value != 0) *value = 0;
+ return 0;
}
-
- return 0;
}

static int
@@ -472,46 +556,40 @@
int (*func)();
st_data_t arg;
{
- st_table_entry *ptr, *last, *tmp;
+ st_table_entry *ptr, *tmp;
enum st_retval retval;
- int i;
-
- for(i = 0; i < table->num_bins; i++) {
- last = 0;
- for(ptr = table->bins; ptr != 0;) {
- retval = (*func)(ptr->key, ptr->record, arg);
- switch (retval) {
- case ST_CHECK: /* check if hash is modified during iteration */
- tmp = 0;
- if (i < table->num_bins) {
- for (tmp = table->bins; tmp; tmp=tmp->next) {
- if (tmp == ptr) break;
- }
- }
- if (!tmp) {
- /* call func with error notice */
- return 1;
- }
- /* fall through */
- case ST_CONTINUE:
- last = ptr;
- ptr = ptr->next;
- break;
- case ST_STOP:
- return 0;
- case ST_DELETE:
- tmp = ptr;
- if (last == 0) {
- table->bins = ptr->next;
- }
- else {
- last->next = ptr->next;
- }
- ptr = ptr->next;
- free(tmp);
- table->num_entries--;
- }
- }
+
+ ptr = table->head;
+ while (ptr) {
+ retval = (*func)(ptr->key, ptr->record, arg);
+ switch (retval) {
+ case ST_CHECK: /* check if hash is modified during iteration
*/
+ tmp = table->head;
+ while (tmp) {
+ if (tmp == ptr) break; /* current element is still
there - good */
+ tmp = tmp->gnext;
+ }
+ if (! tmp) {
+ /* call func with error notice */
+ return 1;
+ }
+ /* fall through */
+ case ST_CONTINUE:
+ ptr = ptr->gnext;
+ break;
+ case ST_STOP:
+ return 0;
+ case ST_DELETE:
+ tmp = ptr;
+ ptr = ptr->gnext;
+ st_unlink_entry(table, tmp);
+ free(tmp);
+ break;
+ default:
+ /* should never happen! */
+ printf("uhh? retval=%d\n", retval);
+ exit(-10);
+ }
}
return 0;
}
diff -ru ruby-1.8.4/st.h ruby-thies/st.h
--- ruby-1.8.4/st.h 2005-02-08 01:50:33.000000000 +0100
+++ ruby-thies/st.h 2006-08-12 17:51:50.000000000 +0200
@@ -21,6 +21,8 @@
int num_bins;
int num_entries;
struct st_table_entry **bins;
+ struct st_table_entry *head;
+ struct st_table_entry *tail;
};

#define st_is_member(table,key) st_lookup(table,key,(st_data_t *)0)
@@ -53,6 +55,7 @@
void st_free_table _((st_table *));
void st_cleanup_safe _((st_table *, st_data_t));
st_table *st_copy _((st_table *));
+void st_reorder _((st_table *, st_data_t *));

#define ST_NUMCMP ((int (*)()) 0)
#define ST_NUMHASH ((int (*)()) -2)
 
T

ts

T> i really miss beeing able to retain order in a Hash. i know that this
T> has been discussed in the past i also understand that i can always
T> write my own OrderedHash implemenation in user-space. -but- as a) i
T> love to poke around b) userspace was too slow for some of my problems
T> -and- c) it's always best to to let "code speak" i decided to "scratch
T> my itch" and wrote a patch (against 1.8.4) that will:

Why you don't first write a C extension, rather than modifying ruby ?
 
B

Brian McCallister

Hi there -

beeing new to ruby (thru rails) and coming from a strong php
background

* remember the order in which elements have been added to a Hash (at
the cost of 2 extra pointers in every hash-bucket + 2 extra pointers
per hash-table)
* use this order for iterating (st_foreach will use this order)
* implement a call Hash.order! allows the user to change the internal
order;-)

Nice :) The better place for discussion hacking on the ruby
interpreter itself is probably ruby-core.

How difficult would it be to do an OrderedHash data structure as a C
extension instead of in Ruby?

-Brian
 
T

Thies C. Arntzen

Brian said:
Nice :) The better place for discussion hacking on the ruby
interpreter itself is probably ruby-core.

i wasn't aware of that list;-) subscribed...
How difficult would it be to do an OrderedHash data structure as a C
extension instead of in Ruby?

i really believe that it needs to be a 1st class citizen - the reason
it's done in C is because userland imposes too much overhead (i
believe) and converting forward<->back to the normal Hash would be
slow, memory intensive and ordering information would get lost on the
way...

also the amount of code added to the Hash class is ~20 lines of C-code
- this code adds zero overhead. the bigger chunk of the code was added
to the st.c which is the underlying library for handling hash-tables -
and duplicating > 600 lines of code to add ~60 lines didn't seem very
cool to me;-)

best, thies
 
D

dblack

Hi --

i really believe that it needs to be a 1st class citizen - the reason
it's done in C is because userland imposes too much overhead (i
believe) and converting forward<->back to the normal Hash would be
slow, memory intensive and ordering information would get lost on the
way...

also the amount of code added to the Hash class is ~20 lines of C-code
- this code adds zero overhead. the bigger chunk of the code was added
to the st.c which is the underlying library for handling hash-tables -
and duplicating > 600 lines of code to add ~60 lines didn't seem very
cool to me;-)

The problem you likely face, though, is that no one will use it (which
may or may not matter to you :) You're requiring that people use a
patched Ruby, and if anyone writes an application that uses it, then
they require that everyone who runs the application use a patched
Ruby, and so on.

It's much more likely -- probably literally thousands of times more
likely -- that people will try out something distributed as an
extension than that they'll patch the Ruby source. And C extensions
are very fast, once loaded.


David

--
http://www.rubypowerandlight.com => Ruby/Rails training & consultancy
----> SEE SPECIAL DEAL FOR RUBY/RAILS USERS GROUPS! <-----
http://dablog.rubypal.com => D[avid ]A[. ]B[lack's][ Web]log
http://www.manning.com/black => book, Ruby for Rails
http://www.rubycentral.org => Ruby Central, Inc.
 
R

Robert Klemme

Thies said:
i wasn't aware of that list;-) subscribed...


i really believe that it needs to be a 1st class citizen, - the reason
it's done in C is because userland imposes too much overhead (i
believe) and converting forward<->back to the normal Hash would be
slow, memory intensive and ordering information would get lost on the
way...

also the amount of code added to the Hash class is ~20 lines of C-code
- this code adds zero overhead. the bigger chunk of the code was added
to the st.c which is the underlying library for handling hash-tables -
and duplicating > 600 lines of code to add ~60 lines didn't seem very
cool to me;-)

Adding to David's comments: you're changing semantics of one of the core
classes which is in itself a dangerous thing. Plus, you increase the
memory footprint of a hash which is IMHO a very bad idea for such a
widely used data structure (and some of these hashes grow huge). So, if
you want to patch ruby core, make it at least a subclass of Hash so
people can choose.

Then of course there's a whole bunch of other things that one should
consider. Is the name properly chosen? Do we have to provide a means
for the user to *define* the standard order on creation? This is e.g.
the way Java's TreeSet works - and IMHO it's superior vs. having to call
order! manually. etc.

Kind regards

robert
 
T

Trans

Robert said:
Adding to David's comments: you're changing semantics of one of the core
classes which is in itself a dangerous thing. Plus, you increase the
memory footprint of a hash which is IMHO a very bad idea for such a
widely used data structure (and some of these hashes grow huge). So, if
you want to patch ruby core, make it at least a subclass of Hash so
people can choose.

Then of course there's a whole bunch of other things that one should
consider. Is the name properly chosen? Do we have to provide a means
for the user to *define* the standard order on creation? This is e.g.
the way Java's TreeSet works - and IMHO it's superior vs. having to call
order! manually. etc.

All good points. I still think it would be nice if such a class were
built-in to Ruby with literal:

[ :a => 1, :b => 2 ]

Maybe as a red-black tree class.

T.
 
H

Hal Fulton

ts said:
T> i really miss beeing able to retain order in a Hash. i know that this
T> has been discussed in the past i also understand that i can always
T> write my own OrderedHash implemenation in user-space. -but- as a) i
T> love to poke around b) userspace was too slow for some of my problems
T> -and- c) it's always best to to let "code speak" i decided to "scratch
T> my itch" and wrote a patch (against 1.8.4) that will:

Why you don't first write a C extension, rather than modifying ruby ?

Interesting question... do you mean a C extension implementing a
different Hash class or what?


Hal
 
H

Hal Fulton

Brian said:
Nice :) The better place for discussion hacking on the ruby
interpreter itself is probably ruby-core.

How difficult would it be to do an OrderedHash data structure as a C
extension instead of in Ruby?

Well, if it were another class, then you would lose the order
information before you ever got into that class, right?

hash = {a=>1, b=>2, c=>3}

ordered = OrderedHash.new(hash) # Oops -- too late


Hal
 
H

Hal Fulton

The problem you likely face, though, is that no one will use it (which
may or may not matter to you :) You're requiring that people use a
patched Ruby, and if anyone writes an application that uses it, then
they require that everyone who runs the application use a patched
Ruby, and so on.

It's much more likely -- probably literally thousands of times more
likely -- that people will try out something distributed as an
extension than that they'll patch the Ruby source. And C extensions
are very fast, once loaded.

My impression was that this was for play and for testing
prior to possibly being folded into Ruby.

Also, I don't yet see how this could be implemented as a
C extension. See my previous email.


Hal
 
D

dblack

Hi --

My impression was that this was for play and for testing
prior to possibly being folded into Ruby.

Like I said -- he may not care whether anyone uses it (in this phase
at least).
Also, I don't yet see how this could be implemented as a
C extension. See my previous email.

Could it be implemented as an extension that redefines Hash?


David

--
http://www.rubypowerandlight.com => Ruby/Rails training & consultancy
----> SEE SPECIAL DEAL FOR RUBY/RAILS USERS GROUPS! <-----
http://dablog.rubypal.com => D[avid ]A[. ]B[lack's][ Web]log
http://www.manning.com/black => book, Ruby for Rails
http://www.rubycentral.org => Ruby Central, Inc.
 
H

Hal Fulton

Could it be implemented as an extension that redefines Hash?

That's a very interesting question. I'd like to think so.

I'd assume that the *parser* at least retains the oreder
in which it sees the elements of the literal (long enough
to pass them to the "guts" of Hash).


Hal
 
M

Mauricio Fernandez

That's a very interesting question. I'd like to think so.

I'd assume that the *parser* at least retains the oreder
in which it sees the elements of the literal (long enough
to pass them to the "guts" of Hash).

I don't think this is easily implementable as an extension without patching
the interpreter.

The order of the associations in the literal is indeed preserved, since
they're put in a linked list inside the NODE_HASH. However, the latter is
evaluated in rb_eval, so the OP would need to patch that, using at least
something like

(untested)

--- eval.c.orig 2006-08-13 02:28:59.000000000 +0200
+++ eval.c 2006-08-13 02:31:39.000000000 +0200
@@ -3750,22 +3750,23 @@
break;

case NODE_HASH:
{
NODE *list;
- VALUE hash = rb_hash_new();
+ VALUE hash = rb_funcall(rb_const_get_at(rb_cObject, rb_intern("Hash")),
+ rb_intern("new"), 0, 0);
VALUE key, val;

list = node->nd_head;
while (list) {
key = rb_eval(self, list->nd_head);
list = list->nd_next;
if (list == 0)
rb_bug("odd number list for Hash");
val = rb_eval(self, list->nd_head);
list = list->nd_next;
- rb_hash_aset(hash, key, val);
+ rb_funcall(hash, rb_intern("[]="), 2, key, val);
}
result = hash;
}
break;


in order to turn literals into ordered hashes if Hash is replaced.

Even if you cache the IDs, the speed impact will probably be significant;
it's too late to time this now.
 
R

Rick DeNatale

Well, if it were another class, then you would lose the order
information before you ever got into that class, right?

hash = {a=>1, b=>2, c=>3}

ordered = OrderedHash.new(hash) # Oops -- too late

The case of initializing with a hash is problematical, but

OrderedHash[a, 1, b, 2, c, 3]

could work if you want control the order on initialization.

And OrderedHash.new(hash) or
OrderedHash[hash]

could be defined to set the order to hash.keys
 
M

Matt Todd

This is a very interesting hack, though I don't really think it is a
good one to implement. Maybe as an extension (which would be really
cool, though I have a hard time seeing that working), but not as
default.

I believe that I've heard that one of the reasons why it's not sorted
any particular way is due to a performance increase in the randomness
of it. I can't verify that, as that's outside of my expertise, but I
don't wish to have Ruby any slower.

But, really, my biggest reasoning is that order doesn't matter to a
dictionary/hash. What matters, above all else, is that you have a
collection of keys that refer to their associated values. Sure, some
kind of ordering would be good for some occasions, but, honestly, I
don't need it often, if ever. When I need some kind of ordered set, I
use an Array. I ask myself: if I really need this ordered, am I really
working with a hash/dictionary, or am I working with an array?

Lastly, though I can't see using it (any time soon, at least), I do
understand that it has valid uses. I don't see Rick's example syntax
to be too far-fetched an alternative to simply using a hash literal
(which I don't use too often, anyways), as uncomfortable as it may be.
But, then again, it's an Array, isn't it? Hmm. But, really, I'm sure
you could figure something out that works well.

Just don't mess with my hash... ;D

M.T.
 
H

Hal Fulton

Matt said:
This is a very interesting hack, though I don't really think it is a
good one to implement. Maybe as an extension (which would be really
cool, though I have a hard time seeing that working), but not as
default.

I tentatively disagree...
I believe that I've heard that one of the reasons why it's not sorted
any particular way is due to a performance increase in the randomness
of it. I can't verify that, as that's outside of my expertise, but I
don't wish to have Ruby any slower.

Not really, as I understand it. The reason the order gets scrambled
is because it's implemented internally as a "true" hash (as in
"hashing function").

I believe that Nobu once made a patch like this and (as I recall)
reported no significant speed hit. (Some small memory hit, of course.)

Nobu? What can you tell us?
But, really, my biggest reasoning is that order doesn't matter to a
dictionary/hash. What matters, above all else, is that you have a
collection of keys that refer to their associated values. Sure, some
kind of ordering would be good for some occasions, but, honestly, I
don't need it often, if ever. When I need some kind of ordered set, I
use an Array. I ask myself: if I really need this ordered, am I really
working with a hash/dictionary, or am I working with an array?

You have a good point. But the thing is, the Hash class has *two*
things I like (two things that make me like hashes). It consists of
*pairs* of items (associations), not just single items. (Of course, you
could view an array as associating integers with objects.) Also the
keys don't have to be integers. And thirdly, it has a convenient literal
notation (as arrays do also).
Lastly, though I can't see using it (any time soon, at least), I do
understand that it has valid uses. I don't see Rick's example syntax
to be too far-fetched an alternative to simply using a hash literal
(which I don't use too often, anyways), as uncomfortable as it may be.
But, then again, it's an Array, isn't it? Hmm. But, really, I'm sure
you could figure something out that works well.

Just don't mess with my hash... ;D

I once had the idea of a separate OrderedHash class with a literal
notation that Ruby would recognize:

unord = {a=>1, b=>2, c=>3}
ord = [a=>1, b=>2, c=>3]

It even looks array-like. Trouble is, Ruby already recognizes such a
notation as being an array with a single element which is a hash.

I am more likely to ask for changes outside the parser than inside it.


Hal
 
T

Thies C. Arntzen

Thies said:
Hi there -

beeing new to ruby (thru rails) and coming from a strong php background

i really miss beeing able to retain order in a Hash. i know that this
has been discussed in the past i also understand that i can always
write my own OrderedHash implemenation in user-space. -but- as a) i
love to poke around b) userspace was too slow for some of my problems
-and- c) it's always best to to let "code speak" i decided to "scratch
my itch" and wrote a patch (against 1.8.4) that will:

hi back - replying to myself...

* it cannot be implemented "as-cool" in a C-Extension cause it would
not integrate "far" enough into ruby to have a real substantial value
IMHO. (using Mauricios patch to eval would alow to implement it fully
as an extension, but it would *slow* down *any* Hash creation in to
engine - so it's not an option)
* it was not meant as a patch to ruby for people to use - i would much
rather like to have this feature a default in upcoming versions of
ruby. so, i posted it for discussion...
* sure, changing the core-classes is a delicate thing, but -heck- why
not? a) improvment most often comes with change, and b) i have have
hacked on laguages-cores (PHP) before;-)
* i would not go as far as proposing to possibility to supply an
order-function to the hash so that it's always ordered in a
user-defined way - if you have to go that far you are better off in
user-space!
* using rb-trees has to be less efficient for simple lookups (O(?)
against O(1))


regarding possible memory-usage and speed tradeoffs:
* the memory footprint increase is tiny and we should be able to ignore
that. rubys implemenation is way more memory efficient that eg PHPs and
noone has complaint there yet.
* the same goes for speed. the lookup-speed is untoched, update is
untouched, insert is O(1) slower (a few more pointers), deletion is a
tad slower (but that could be fixed). sidenote: i even think we could
squueze a tiny bit more out of our current hash-table implementation.
i don't believe that you will be able to find any benchmark to prove
this to slow down or bloat ruby in any way...

re, tc
 
M

Matt Todd

Wow, you worked on PHP? You brave soul! :) I've made some suggestions
to that crowd, but they are very stone-cold and concretely resolute in
staying with how things are. They move slow: a whole version just for
something that should've been disabled long ago (register_globals) and
Unicode support (yay).

But, back to the topic, I'm not a fan of increasing size, even if
we're talking 'neglible' amounts. 2n+2 is a large increase, relatively
speaking.

To me, though, for something that is not needed for the majority of
the uses of a normal hash, I'd take the speed and memory over the
ordering capability. But, then again, that's just me.

M.T.
 
M

Mauricio Fernandez

* it cannot be implemented "as-cool" in a C-Extension cause it would
not integrate "far" enough into ruby to have a real substantial value
IMHO. (using Mauricios patch to eval would alow to implement it fully
as an extension, but it would *slow* down *any* Hash creation in to

Not Hash.new ;)
engine - so it's not an option)

Right. Here are some times just for fun; the overhead is around 50%:

$ cat $MESS/time-hash.rb

def time(exec, code = "1000000.times{ {1,1,1,1,1,1} }")
`/usr/bin/time #{exec} -e "#{code}" 2>&1 `[/(\d\.\d+)user/, 1].to_f
end

RUNS = 5
base = time("./ruby", "1000000.times{}")
a = (0...RUNS).map{ time("./ruby.orig") - base }
b = (0...RUNS).map{ time("./ruby") - base}
ma = a.inject{|s,x| s+x}/a.size
mb = b.inject{|s,x| s+x}/b.size
sa = Math.sqrt(a.inject(0){|s,x| s+x**2}/a.size - ma**2)
sb = Math.sqrt(b.inject(0){|s,x| s+x**2}/b.size - mb**2)
puts "orig %4.2f <%4.2f>" % [ma, sa]
puts "patched %4.2f <%4.2f> (#{(100*mb/ma).round}%%)" % [mb, sb]

$ ruby $MESS/time-hash.rb
orig 2.11 <0.01>
* it was not meant as a patch to ruby for people to use - i would much
rather like to have this feature a default in upcoming versions of
ruby. so, i posted it for discussion...

Maybe this thread should be moved to ruby-core.
* sure, changing the core-classes is a delicate thing, but -heck- why
not? a) improvment most often comes with change, and b) i have have
hacked on laguages-cores (PHP) before;-)
===
You could have kept that to yourself };-)
regarding possible memory-usage and speed tradeoffs:
* the memory footprint increase is tiny and we should be able to ignore
that. rubys implemenation is way more memory efficient that eg PHPs and
noone has complaint there yet.
* the same goes for speed. the lookup-speed is untoched, update is
untouched, insert is O(1) slower (a few more pointers), deletion is a
tad slower (but that could be fixed). sidenote: i even think we could
squueze a tiny bit more out of our current hash-table implementation.
i don't believe that you will be able to find any benchmark to prove
this to slow down or bloat ruby in any way...

Your patch is similar to
http://www.rubyist.net/~nobu/ruby/ordered-hash.diff
which was not applied due to concerns about the memory overhead...
 
D

Daniel Schierbeck

This is a question that has been raised during an earlier discussion of
this issue: What should this expression return?

{:foo => 1, :bar => 2} == {:bar => 2, :foo => 1}

I don't think there's any way around it returning `true', but that would
decrease the usefulness of ordered hashes. One solution would be that
Hash#== returns true in such cases while Hash#eql? returns false, just as

2 == 2.0 => true
2.eql? 2.0 => false


Cheers,
Daniel
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

Forum statistics

Threads
473,744
Messages
2,569,479
Members
44,899
Latest member
RodneyMcAu

Latest Threads

Top