3.2 can't extract tarfile produced by 2.7

A

Antoon Pardon

I am converting some programs to python 3. These programs manipulate
tarfiles. In order for the python3 programs to be really useful
they need to be able to process the tarfiles produced by python2 that
however seems to be a problem.

This is testcode that produces a tarfile.

#! /usr/bin/python

compression = "bz2"
tarmode = "w|%s" % compression
rt = '.'

import os
import os.path
import errno

import tarfile as tar

def process():
pj = os.path.join
entries = os.listdir(rt)
of = open("DUMP.tbz", "w")
tf = tar.open(mode = tarmode, fileobj = of,
encoding = 'ascii', format = tar.PAX_FORMAT)
for entry in entries:
fqpn = pj(rt, entry)
try:
tf.add(fqpn, entry, recursive = False)
except OSError as ErrInfo:
print("%s: disappeared" % fqpn)
if ErrInfo.errno != errno.ENOENT:
raise
tf.close()
of.close()

if __name__ == "__main__":
process()

==============================================================================
This is testcode that checks a tarfile

#!/usr/bin/python

compression = "bz2"
tarmode = "r|%s" % compression

import os
import os.path
import stat

import tarfile as tar

def equalfile(fl1, fl2):
bf1 = fl1.read(8192)
bf2 = fl2.read(8192)
while bf1 == bf2:
if bf1 == "":
return True
bf1 = fl1.read(8192)
bf2 = fl2.read(8192)
return False

def process():
gf = open("DUMP.tbz", "r")
tf = tar.open(mode = tarmode, fileobj = gf,
encoding = 'ascii', format = tar.PAX_FORMAT)
for tarinfo in tf:
entry = tarinfo.name
fileinfo = os.stat(entry)
if stat.S_ISREG(fileinfo.st_mode) and tarinfo.isreg():
bfl = tf.extractfile(tarinfo)
ofl = open(entry)
if not equalfile(bfl, ofl):
print("%s: does not match backup" % entry)
sync = False
tf.close()
gf.close()

if __name__ == "__main__":
process()

=================================================================================

When I use python2.7 to produce and later check the tarfile everything
works as expected. However when I use python3.2 to check the tarfile I
get the following traceback.

Traceback (most recent call last):
File "tarchck", line 39, in <module>
process()
File "tarchck", line 25, in process
encoding = 'ascii', format = tar.PAX_FORMAT)
File "/usr/lib/python3.2/tarfile.py", line 1771, in open
t = cls(name, filemode, stream, **kwargs)
File "/usr/lib/python3.2/tarfile.py", line 1667, in __init__
self.firstmember = self.next()
File "/usr/lib/python3.2/tarfile.py", line 2418, in next
tarinfo = self.tarinfo.fromtarfile(self)
File "/usr/lib/python3.2/tarfile.py", line 1281, in fromtarfile
buf = tarfile.fileobj.read(BLOCKSIZE)
File "/usr/lib/python3.2/tarfile.py", line 573, in read
buf = self._read(size)
File "/usr/lib/python3.2/tarfile.py", line 585, in _read
buf = self.__read(self.bufsize)
File "/usr/lib/python3.2/tarfile.py", line 604, in __read
buf = self.fileobj.read(self.bufsize)
File "/usr/lib/python3.2/codecs.py", line 300, in decode
(result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0x9e in position 10:
invalid start byte

I have been looking around but have no idea how I have to adapt this
code in order to have it process the tarfile under python3.2. The
original code didn't have the coding and format keywords on the tar.open
statement and after reading the documentation I thought that
would make things work, but no such luck. Further reading didn't
provide anything usefull
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

Forum statistics

Threads
473,755
Messages
2,569,537
Members
45,020
Latest member
GenesisGai

Latest Threads

Top