7
7stud
I'm applying groupby() in a very simplistic way to split up some data,
but when I timeit against another method, it takes twice as long. The
following groupby() code groups the data between the "</tr>" strings:
data = [
"1.5","</tr>","2.5","3.5","4.5","</tr>","</tr>","5.5","6.5","</tr>",
"1.5","</tr>","2.5","3.5","4.5","</tr>","</tr>","5.5","6.5","</tr>",
"1.5","</tr>","2.5","3.5","4.5","</tr>","</tr>","5.5","6.5","</tr>",
]
import itertools
def key(s):
if s[0] == "<":
return 'a'
else:
return 'b'
def test3():
master_list = []
for group_key, group in itertools.groupby(data, key):
if group_key == "b":
master_list.append(list(group) )
def test1():
master_list = []
row = []
for elmt in data:
if elmt[0] != "<":
row.append(elmt)
else:
if row:
master_list.append(" ".join(row) )
row = []
import timeit
t = timeit.Timer("test3()", "from __main__ import test3, key, data")
print t.timeit()
t = timeit.Timer("test1()", "from __main__ import test1, data")
print t.timeit()
--output:---
42.791079998
19.0128788948
I thought groupby() would be faster. Am I doing something wrong?
but when I timeit against another method, it takes twice as long. The
following groupby() code groups the data between the "</tr>" strings:
data = [
"1.5","</tr>","2.5","3.5","4.5","</tr>","</tr>","5.5","6.5","</tr>",
"1.5","</tr>","2.5","3.5","4.5","</tr>","</tr>","5.5","6.5","</tr>",
"1.5","</tr>","2.5","3.5","4.5","</tr>","</tr>","5.5","6.5","</tr>",
]
import itertools
def key(s):
if s[0] == "<":
return 'a'
else:
return 'b'
def test3():
master_list = []
for group_key, group in itertools.groupby(data, key):
if group_key == "b":
master_list.append(list(group) )
def test1():
master_list = []
row = []
for elmt in data:
if elmt[0] != "<":
row.append(elmt)
else:
if row:
master_list.append(" ".join(row) )
row = []
import timeit
t = timeit.Timer("test3()", "from __main__ import test3, key, data")
print t.timeit()
t = timeit.Timer("test1()", "from __main__ import test1, data")
print t.timeit()
--output:---
42.791079998
19.0128788948
I thought groupby() would be faster. Am I doing something wrong?