F
F. Petitjean
I have written a script to find the modules which export the largest
number of names. The gc.getreferrers(*objs) function gives also an idea
of the dependencies between the modules.
The code (statsmod.py) :
#!/usr/bin/env python
# -*- coding: latin-1 -*-
"""
statsmod.py module rudimentaire de statistiques des noms exportés
par les modules de la bibliothèque standard
"""
import sys
import gc
from glob import glob
import os, os.path
from os.path import basename
def browse_stdlib():
"""browse the standard library
returns list of names of modules
"""
pyver = 'python%s' % (sys.version[:3],)
pyglob = os.path.join(sys.prefix, 'lib', pyver, '*.py')
# lpys = glob(pyglob)
if os.path.exists(os.path.join(sys.prefix, 'Lib', 'os.pyc')):
pyglob = os.path.join(sys.prefix, 'Lib', '*.py')
lpys = map(basename, glob(pyglob))
names = [ name[:-3] for name in lpys ]
# remove some obsolete modules ('this' + DeprecationWarning)
for dontparse in ("this", "tzparse", 'FCNTL', 'posixfile', 'pre', 'regsub',
'statcache', 'TERMIOS', 'xmllib'):
try:
names.remove(dontparse)
except ValueError:
continue
return names
def exports(names, with_modules=False):
"""imports all the modules in names
returns a 2-tuple :
- list of tuples : NumberOfExternalNames len(dir(module)) nodname
- list of modules (if with_modules is true)
"""
res = []
add = res.append
_all = []
modules = []
# this simple minded method (__import__) doesn't include sys ?
for name in names:
print name, " ",
try:
module = __import__(name, globals(), locals(), _all)
ldir = len(dir(module))
if hasattr(module, '__all__'):
nexports = len(module.__all__)
else:
nexports = ldir
add((nexports, ldir, name))
if with_modules:
modules.append(module)
# del sys.modules[name]
except ImportError, msg:
print "cannot import module", name, msg
return res, modules
def pm_histo(values, nbins=20):
"""a poor man histogram
Return a list of nbins tuples (left, right) such that
the union of the consecutive ranges(left, right) is range(len(values)+1)
values[k]
"""
vlo, vhi = values[0], values[-1]+1
nbins = min(nbins, vhi-vlo)
deltax = int((vhi - vlo)/nbins)
assert deltax > 0
ranges = []
add = ranges.append
left = 0 # left index first bin
val = vlo + deltax
while val < vhi:
for right in range(left, len(values)):
if values
number of names. The gc.getreferrers(*objs) function gives also an idea
of the dependencies between the modules.
The code (statsmod.py) :
#!/usr/bin/env python
# -*- coding: latin-1 -*-
"""
statsmod.py module rudimentaire de statistiques des noms exportés
par les modules de la bibliothèque standard
"""
import sys
import gc
from glob import glob
import os, os.path
from os.path import basename
def browse_stdlib():
"""browse the standard library
returns list of names of modules
"""
pyver = 'python%s' % (sys.version[:3],)
pyglob = os.path.join(sys.prefix, 'lib', pyver, '*.py')
# lpys = glob(pyglob)
if os.path.exists(os.path.join(sys.prefix, 'Lib', 'os.pyc')):
pyglob = os.path.join(sys.prefix, 'Lib', '*.py')
lpys = map(basename, glob(pyglob))
names = [ name[:-3] for name in lpys ]
# remove some obsolete modules ('this' + DeprecationWarning)
for dontparse in ("this", "tzparse", 'FCNTL', 'posixfile', 'pre', 'regsub',
'statcache', 'TERMIOS', 'xmllib'):
try:
names.remove(dontparse)
except ValueError:
continue
return names
def exports(names, with_modules=False):
"""imports all the modules in names
returns a 2-tuple :
- list of tuples : NumberOfExternalNames len(dir(module)) nodname
- list of modules (if with_modules is true)
"""
res = []
add = res.append
_all = []
modules = []
# this simple minded method (__import__) doesn't include sys ?
for name in names:
print name, " ",
try:
module = __import__(name, globals(), locals(), _all)
ldir = len(dir(module))
if hasattr(module, '__all__'):
nexports = len(module.__all__)
else:
nexports = ldir
add((nexports, ldir, name))
if with_modules:
modules.append(module)
# del sys.modules[name]
except ImportError, msg:
print "cannot import module", name, msg
return res, modules
def pm_histo(values, nbins=20):
"""a poor man histogram
Return a list of nbins tuples (left, right) such that
the union of the consecutive ranges(left, right) is range(len(values)+1)
values[k]
"""
vlo, vhi = values[0], values[-1]+1
nbins = min(nbins, vhi-vlo)
deltax = int((vhi - vlo)/nbins)
assert deltax > 0
ranges = []
add = ranges.append
left = 0 # left index first bin
val = vlo + deltax
while val < vhi:
for right in range(left, len(values)):
if values
> val:
break
add((left, right))
left = right
val = val + deltax
return ranges
def basic_stat(seq):
"""basic statistics on the values in seq
Returns NumberOfItems, MeanValue, StandardDeviation, variance
"""
s0, s1, s2 = 0, 0, 0
for indx, item in enumerate(seq):
s0 = s0 + 1 # seq may be an iterable without len
Xi = float(item)
if not indx:
Xmin = Xi
s1 = s1 + Xi
s2 = s2 + Xi*Xi
# s0 = len(seq) # sum of 0 order
Xm = s1/s0 # mean value
Xmax = Xi
median = (Xmin + Xmax)*0.5
variance = (s2 - s0*Xm*Xm)/s0 # ecart-type ** 2
import math
stddev = math.sqrt(variance) # ecart-type
return s0, Xmin, Xmax, median, Xm, stddev # , variance
if __name__ == '__main__':
names = ['cStringIO', 'sys', 'gc' ]
names.extend(browse_stdlib())
freqs, modules = exports(names, True)
print # exports() prints without new line
print "%d imported modules and %d in sys.modules" % (
len(freqs), len(sys.modules))
print "number of unreachable objects", gc.collect()
simples = []
while modules:
module = modules.pop()
# print module.__name__, sys.getrefcount(module)
items = gc.get_referrers(module)
litems = len(items)
if litems <= 2:
simples.append(module.__name__)
del sys.modules[module.__name__], module, items
else:
print "referrers of %s" % (module.__name__,)
for item in items[2:]:
name = item.get('__file__', 'unknown')
if name.endswith('__init__.pyc'):
pslash = name.rfind(os.sep)
pslash = name[
slash].rfind(os.sep)
name = name[pslash+1:][:-4] # strip .pyc
elif name.endswith('__init__.py'):
pslash = name.rfind(os.sep)
pslash = name[
slash].rfind(os.sep)
name = name[pslash+1:][:-3] # strip .py
elif name.endswith('.pyc'):
pslash = name.rfind(os.sep)
name = name[pslash+1:][:-4] # strip .pyc
elif name.endswith('.py'):
pslash = name.rfind(os.sep)
name = name[pslash+1:][:-3] # strip .py
print name,
del module, items
print
print "number of unreachable objects", gc.collect()
print "new length of sys.modules %d" % (len(sys.modules),)
print "%d simple modules" % (len(simples),)
freqs.sort()
values = [item[0] for item in freqs ]
# print freqs[-2:] # supprimés
# del values[-2:]
ranges = pm_histo(values)
ranges2 = [ item for item in ranges if item[1] > item[0]]
limite = ranges[0][1] + 1 # first bin
rangesbas = pm_histo(values[:95], 6)
print rangesbas
lbin = 11
start = 0
print "St Nb. min max median average stddev"
fmt = "%3d%3d%6.1f%6.1f%8.3f%8.3f%8.3f"
while start < len(values):
res = (start,) + basic_stat(values[start:start+lbin])
print fmt % res
start = start + lbin
print "modules with a lot of external names :"
for item in freqs[140:]:
print item
Parts of output of python -i statsmod.py (python2.4 windows)
repr rexec rfc822 rlcompleter cannot import module rlcompleter
No module named readline robotparser sched
.... etc ...
whrandom C:\Python24\lib\whrandom.py:38: DeprecationWarning: the
whrandom module is deprecated; please use the random module
DeprecationWarning)
xdrlib xmlrpclib zipfile
.... etc ...
_threading_local __future__ __phello__.foo Hello world...
cannot import module __phello__.foo No module named foo
cannot import module __phello__.foo No module named foo
number of unreachable objects 0
referrers of __future__
referrers of __future__
.... etc ...
referrers of socket
asynchat asyncore BaseHTTPServer SocketServer urllib httplib ftplib
imaplib nntplib poplib smtpd smtplib Utils
.... etc ...
referrers of cStringIO
logging\__init__ xmlrpclib
number of unreachable objects 564
new length of sys.modules 154
121 simple modules
[(0, 39), (39, 58), (58, 74), (74, 79), (79, 91), (91, 94)]
St Nb. min max median average stddev
0 11 1.0 1.0 1.000 1.000 0.000
11 11 1.0 2.0 1.500 1.545 0.498
22 11 2.0 2.0 2.000 2.000 0.000
33 11 2.0 3.0 2.500 2.455 0.498
44 11 3.0 3.0 3.000 3.000 0.000
55 11 3.0 4.0 3.500 3.727 0.445
66 11 4.0 5.0 4.500 4.273 0.445
77 11 5.0 6.0 5.500 5.818 0.386
88 11 6.0 8.0 7.000 6.909 0.668
99 11 9.0 10.0 9.500 9.545 0.498
110 11 10.0 12.0 11.000 11.182 0.716
121 11 12.0 16.0 14.000 13.818 1.113
132 11 16.0 21.0 18.500 17.636 1.367
143 11 21.0 29.0 25.000 24.818 2.367
154 11 31.0 51.0 41.000 38.364 7.413
165 11 55.0 92.0 73.500 70.636 10.764
176 5 97.0 136.0 116.500 111.400 13.865
modules with a lot of external names :
(18, 40, 'cgi')
(19, 19, 'cgitb')
.... etc ...
(72, 72, 'pydoc')
(74, 74, 'cookielib')
(78, 78, 'urllib2')
(86, 86, 'symbol')
(92, 92, 'sre_constants')
(97, 97, 'xmlrpclib')
(101, 118, 'os')
(107, 107, 'sre_compile')
(116, 116, 'sre_parse')
(136, 151, 'socket')
Output with python 2.3.3 Linux gives a greater number for socket as the
OpenSSL library is wrapped.
gc.collect() at the interactive prompt gives 0. (good)
Conclusion :
sre_compile and sre_parse should be coded with a __all__ attribute
The standard library contains a module 'tzparse' which cannot be imported !
Most library modules do not begin with #!/usr/bin/env python and a
coding cookie.
Regards
break
add((left, right))
left = right
val = val + deltax
return ranges
def basic_stat(seq):
"""basic statistics on the values in seq
Returns NumberOfItems, MeanValue, StandardDeviation, variance
"""
s0, s1, s2 = 0, 0, 0
for indx, item in enumerate(seq):
s0 = s0 + 1 # seq may be an iterable without len
Xi = float(item)
if not indx:
Xmin = Xi
s1 = s1 + Xi
s2 = s2 + Xi*Xi
# s0 = len(seq) # sum of 0 order
Xm = s1/s0 # mean value
Xmax = Xi
median = (Xmin + Xmax)*0.5
variance = (s2 - s0*Xm*Xm)/s0 # ecart-type ** 2
import math
stddev = math.sqrt(variance) # ecart-type
return s0, Xmin, Xmax, median, Xm, stddev # , variance
if __name__ == '__main__':
names = ['cStringIO', 'sys', 'gc' ]
names.extend(browse_stdlib())
freqs, modules = exports(names, True)
print # exports() prints without new line
print "%d imported modules and %d in sys.modules" % (
len(freqs), len(sys.modules))
print "number of unreachable objects", gc.collect()
simples = []
while modules:
module = modules.pop()
# print module.__name__, sys.getrefcount(module)
items = gc.get_referrers(module)
litems = len(items)
if litems <= 2:
simples.append(module.__name__)
del sys.modules[module.__name__], module, items
else:
print "referrers of %s" % (module.__name__,)
for item in items[2:]:
name = item.get('__file__', 'unknown')
if name.endswith('__init__.pyc'):
pslash = name.rfind(os.sep)
pslash = name[
name = name[pslash+1:][:-4] # strip .pyc
elif name.endswith('__init__.py'):
pslash = name.rfind(os.sep)
pslash = name[
name = name[pslash+1:][:-3] # strip .py
elif name.endswith('.pyc'):
pslash = name.rfind(os.sep)
name = name[pslash+1:][:-4] # strip .pyc
elif name.endswith('.py'):
pslash = name.rfind(os.sep)
name = name[pslash+1:][:-3] # strip .py
print name,
del module, items
print "number of unreachable objects", gc.collect()
print "new length of sys.modules %d" % (len(sys.modules),)
print "%d simple modules" % (len(simples),)
freqs.sort()
values = [item[0] for item in freqs ]
# print freqs[-2:] # supprimés
# del values[-2:]
ranges = pm_histo(values)
ranges2 = [ item for item in ranges if item[1] > item[0]]
limite = ranges[0][1] + 1 # first bin
rangesbas = pm_histo(values[:95], 6)
print rangesbas
lbin = 11
start = 0
print "St Nb. min max median average stddev"
fmt = "%3d%3d%6.1f%6.1f%8.3f%8.3f%8.3f"
while start < len(values):
res = (start,) + basic_stat(values[start:start+lbin])
print fmt % res
start = start + lbin
print "modules with a lot of external names :"
for item in freqs[140:]:
print item
Parts of output of python -i statsmod.py (python2.4 windows)
repr rexec rfc822 rlcompleter cannot import module rlcompleter
No module named readline robotparser sched
.... etc ...
whrandom C:\Python24\lib\whrandom.py:38: DeprecationWarning: the
whrandom module is deprecated; please use the random module
DeprecationWarning)
xdrlib xmlrpclib zipfile
.... etc ...
_threading_local __future__ __phello__.foo Hello world...
cannot import module __phello__.foo No module named foo
cannot import module __phello__.foo No module named foo
number of unreachable objects 0
referrers of __future__
referrers of __future__
.... etc ...
referrers of socket
asynchat asyncore BaseHTTPServer SocketServer urllib httplib ftplib
imaplib nntplib poplib smtpd smtplib Utils
.... etc ...
referrers of cStringIO
logging\__init__ xmlrpclib
number of unreachable objects 564
new length of sys.modules 154
121 simple modules
[(0, 39), (39, 58), (58, 74), (74, 79), (79, 91), (91, 94)]
St Nb. min max median average stddev
0 11 1.0 1.0 1.000 1.000 0.000
11 11 1.0 2.0 1.500 1.545 0.498
22 11 2.0 2.0 2.000 2.000 0.000
33 11 2.0 3.0 2.500 2.455 0.498
44 11 3.0 3.0 3.000 3.000 0.000
55 11 3.0 4.0 3.500 3.727 0.445
66 11 4.0 5.0 4.500 4.273 0.445
77 11 5.0 6.0 5.500 5.818 0.386
88 11 6.0 8.0 7.000 6.909 0.668
99 11 9.0 10.0 9.500 9.545 0.498
110 11 10.0 12.0 11.000 11.182 0.716
121 11 12.0 16.0 14.000 13.818 1.113
132 11 16.0 21.0 18.500 17.636 1.367
143 11 21.0 29.0 25.000 24.818 2.367
154 11 31.0 51.0 41.000 38.364 7.413
165 11 55.0 92.0 73.500 70.636 10.764
176 5 97.0 136.0 116.500 111.400 13.865
modules with a lot of external names :
(18, 40, 'cgi')
(19, 19, 'cgitb')
.... etc ...
(72, 72, 'pydoc')
(74, 74, 'cookielib')
(78, 78, 'urllib2')
(86, 86, 'symbol')
(92, 92, 'sre_constants')
(97, 97, 'xmlrpclib')
(101, 118, 'os')
(107, 107, 'sre_compile')
(116, 116, 'sre_parse')
(136, 151, 'socket')
Output with python 2.3.3 Linux gives a greater number for socket as the
OpenSSL library is wrapped.
gc.collect() at the interactive prompt gives 0. (good)
Conclusion :
sre_compile and sre_parse should be coded with a __all__ attribute
The standard library contains a module 'tzparse' which cannot be imported !
Most library modules do not begin with #!/usr/bin/env python and a
coding cookie.
Regards