Zipfile module errors

J

jwesonga

Hi,

I have a python script that supposed to go through a folder, pick the
zipped files, unzip them and process the data inside. I'm not sure
where i'm going wrong with this script because it all seems correct:

#! /usr/bin/env python
import zipfile
import os
from elementtree import ElementTree as ET
import MySQLdb
import sys

sys.stdout = open("log",'w')
sys.stderr = open("log",'w')

username = 'xxx'
password = 'xxx'
host = 'xxx'
database = 'xxx'

infolder = "/home/username/received/"
outfolder = "/home/username/webapps/app1/public/processed/"
class RecursiveFileIterator:
def __init__ (self, *rootDirs):
self.dirQueue = list (rootDirs)
self.includeDirs = None
self.fileQueue = []

def __getitem__ (self, index):
while len (self.fileQueue) == 0:
self.nextDir ()
result = self.fileQueue [0]
del self.fileQueue [0]
return result

def nextDir (self):
dir = self.dirQueue [0] # fails with IndexError, which is
fine
# for iterator interface
del self.dirQueue [0]
list = os.listdir (dir)
join = os.path.join
isdir = os.path.isdir
for basename in list:
fullPath = join (dir, basename)
if isdir (fullPath):
self.dirQueue.append (fullPath)
if self.includeDirs:
self.fileQueue.append (fullPath)
else:
self.fileQueue.append (fullPath)

def unzip(folder):
filelist = RecursiveFileIterator(folder)
for one in filelist:
xmlname = one.replace(".zip",".xml")
pngname = one.replace(".zip",".png")
mp3name = one.replace(".zip",".mp3")
zfile = zipfile.ZipFile(one,'r')
for filename in zfile.namelist():
data = zfile.read(filename)
file = open(infolder+"/"+filename,'w')
file.write(data)
file.close()
parse_xml(xmlname)
os.rename(xmlname,outfolder+xmlname.strip(infolder+'/'))
try:
os.rename(pngname,outfolder+pngname.strip(infolder+'/'))
except:
print one+" has no picture file"
try:
os.rename(mp3name,outfolder+mp3name.strip(infolder+'/'))
except:
print one+" has no sound file"
os.remove(one)

def parse_xml(filename):
topic =[]
f = open(filename,'r')
#print f
content = f.read()
f.close()
#print content
element = ET.XML(content)
#Extract the elements from xml file
for subelement in element:
if subelement.tag=='datestamp':
date = str(subelement.text)
if subelement.tag=='properties':
for each in subelement:
if each.tag=='name':
name = str(each.text)
elif each.tag=='age':
age = str(each.text)
elif each.tag=='gender':
gender = str(each.text)
elif each.tag=='email':
email = str(each.text)
elif each.tag=='language':
language = str(each.text)
elif each.tag=='otherlanguage':
otherlanguage = str(each.text)
elif each.tag=='country':
country = str(each.text)
elif each.tag=='city':
city = str(each.text)
elif each.tag=='referral':
referral = str(each.text)
if subelement.tag=='recording':
for sub_subelement in subelement:
if sub_subelement.tag=='duration':
duration = str(sub_subelement.text)
if sub_subelement.tag=='file':
sound = str(sub_subelement.text)
if sub_subelement.tag=='image':
picture = str(sub_subelement.text)
if subelement.tag=='summary':
summary = str(subelement.text)
if subelement.tag=='categories':
for sub_subelement in subelement:
if sub_subelement.text == 'True':
topic.append(str(sub_subelement.tag))
if sub_subelement.tag == 'othercategory':
topic.append(str(sub_subelement.text))

db=MySQLdb.connect(host=host,user=username,passwd=password,db=database)
c = db.cursor()
topic = str(topic)
c.execute("""INSERT INTO
stories(name,age,gender,email,language,otherlanguage,country,city,referral,duration,audiofilename,picture,summary,topic)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)""",
(name,age,gender,email,language,otherlanguage,country,city,referral,duration,sound,picture,summary,topic))

unzip(infolder)

The error I keep getting is:

Traceback (most recent call last):
File "processor3.py", line 124, in ?
unzip(infolder)
File "processor3.py", line 53, in unzip
zfile = zipfile.ZipFile(one,'r')
File "/usr/lib/python2.4/zipfile.py", line 210, in __init__
self._GetContents()
File "/usr/lib/python2.4/zipfile.py", line 230, in _GetContents
self._RealGetContents()
File "/usr/lib/python2.4/zipfile.py", line 240, in _RealGetContents
endrec = _EndRecData(fp)
File "/usr/lib/python2.4/zipfile.py", line 83, in _EndRecData
fpin.seek(-22, 2) # Assume no archive comment.
IOError: [Errno 22] Invalid argument


I have confirmed that the folder contains the zipped files, and that
the zipped files contain the xml file. No reason why it shouldn't
work. The server is running Python 2.4. Please help me.
 
J

John Machin

Hi,

I have a python script that supposed to go through a folder, pick the
zipped files, unzip them and process the data inside. I'm not sure
where i'm going wrong with this script because it all seems correct:

Nothing is ever as it seems. Let's try to work backwards from the
error message ... and we don't need your magnificent script, just the
traceback will do for now, so:

[ big snip]
The error I keep getting is:

Traceback (most recent call last):
File "processor3.py", line 124, in ?
unzip(infolder)
File "processor3.py", line 53, in unzip

The error says that you are trying to seek 22 bytes backwards from the
end of a file that you presume is a zip file, and this is deemed to be
invalid. Hypotheses: (1) zipfile.py is buggy (2) your file is less
than 22 bytes long. Let's park hypothesis 1 for the moment. Insert the
following code before the call to zipfile.ZipFile:

print "trying to unzip %r whose size is %d bytes" \
% (one, os.stat(one).st_size)

and tell us your conclusions.
zfile = zipfile.ZipFile(one,'r')
File "/usr/lib/python2.4/zipfile.py", line 210, in __init__
self._GetContents()
File "/usr/lib/python2.4/zipfile.py", line 230, in _GetContents
self._RealGetContents()
File "/usr/lib/python2.4/zipfile.py", line 240, in _RealGetContents
endrec = _EndRecData(fp)
File "/usr/lib/python2.4/zipfile.py", line 83, in _EndRecData
fpin.seek(-22, 2) # Assume no archive comment.
IOError: [Errno 22] Invalid argument

P.S. Printing the contents of filelist immediately after it's been
created might be a good idea. You say "pick the zipped files" but the
only condition I see is a test using os.path.isdir.

HTH,
John
 
J

John Machin

jwesonga said:
I've added the line to the script, added a zipped file into the
folder. I've made sure the file exists. The error is now this:

Please get some clues:
(1) Don't reply off-list unless specifically invited.
(2) Don't top-post.
(3) Do read and try to understand *all* of each reply that you get ...
e.g. """
P.S. Printing the contents of filelist immediately after it's been
created might be a good idea. You say "pick the zipped files" but the
only condition I see is a test using os.path.isdir.
"""

(4) Do read and try to understand the output from your own script, e.g.
[jwesonga@web38 processor_files]$ python2.4 processor3.py
trying to unzip '/home/jwesonga/received/log.txt' whose size is 752
bytes

Doesn't that tell you anything? Like you are trying to unzip your own
logfile??

Traceback (most recent call last):
File "processor3.py", line 125, in ?
unzip(infolder)
File "processor3.py", line 54, in unzip
zfile = zipfile.ZipFile(one,'r')
File "/usr/lib/python2.4/zipfile.py", line 210, in __init__
self._GetContents()
File "/usr/lib/python2.4/zipfile.py", line 230, in _GetContents
self._RealGetContents()
File "/usr/lib/python2.4/zipfile.py", line 242, in _RealGetContents
raise BadZipfile, "File is not a zip file"
zipfile.BadZipfile: File is not a zip file

This is strange because I can see the zipped file inside the folder /
home/jwesonga/received what could be the problem?

Hi,
I have a python script that supposed to go through a folder, pick the
zipped files, unzip them and process the data inside. I'm not sure
where i'm going wrong with this script because it all seems correct:
Nothing is ever as it seems. Let's try to work backwards from the
error message ... and we don't need your magnificent script, just the
traceback will do for now, so:

[ big snip]


The error I keep getting is:
Traceback (most recent call last):
File "processor3.py", line 124, in ?
unzip(infolder)
File "processor3.py", line 53, in unzip
The error says that you are trying to seek 22 bytes backwards from the
end of a file that you presume is a zip file, and this is deemed to be
invalid. Hypotheses: (1) zipfile.py is buggy (2) your file is less
than 22 bytes long. Let's park hypothesis 1 for the moment. Insert the
following code before the call to zipfile.ZipFile:

print "trying to unzip %r whose size is %d bytes" \
% (one, os.stat(one).st_size)

and tell us your conclusions.
zfile = zipfile.ZipFile(one,'r')
File "/usr/lib/python2.4/zipfile.py", line 210, in __init__
self._GetContents()
File "/usr/lib/python2.4/zipfile.py", line 230, in _GetContents
self._RealGetContents()
File "/usr/lib/python2.4/zipfile.py", line 240, in _RealGetContents
endrec = _EndRecData(fp)
File "/usr/lib/python2.4/zipfile.py", line 83, in _EndRecData
fpin.seek(-22, 2) # Assume no archive comment.
IOError: [Errno 22] Invalid argument
P.S. Printing the contents of filelist immediately after it's been
created might be a good idea. You say "pick the zipped files" but the
only condition I see is a test using os.path.isdir.

HTH,
John
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Similar Threads


Members online

Forum statistics

Threads
473,769
Messages
2,569,579
Members
45,053
Latest member
BrodieSola

Latest Threads

Top