Problem--IOError: [Errno 13] Permission denied

P

patrick.waldo

Hi all,

After sludging my way through many obstacles with this interesting
puzzle of a text parsing program, I found myself with one final error:

Traceback (most recent call last):
File "C:\Python24\Lib\site-packages\pythonwin\pywin\framework
\scriptutils.py", line 310, in RunScript
exec codeObject in __main__.__dict__
File "C:\Documents and Settings\Patrick Waldo\My Documents\Python
\WORD\try5-2-file-1-all patterns.py", line 77, in ?
input = codecs.open(input_text, 'r','utf8')
File "C:\Python24\lib\codecs.py", line 666, in open
file = __builtin__.open(filename, mode, buffering)
IOError: [Errno 13] Permission denied: 'C:\\text_samples\\test\
\output'

The error doesn't stop the program from functioning as it should,
except the last line of every document gets split with | in between
the words, which is just strange. I have no idea why either is
happening, but perhaps they are related.

Any ideas?

#For text files in a directory...
#Analyzes a randomly organized UTF8 document with EINECS, CAS,
Chemical, and Chemical Formula
#into a document structured as EINECS|CAS|Chemical|Chemical Formula.

import os
import codecs
import re

path = "C:\\text_samples\\test\\"
path2 = "C:\\text_samples\\test\\output\\"

EINECS = re.compile(r'^\d\d\d-\d\d\d-\d$')
FORMULA = re.compile(r'([A-Z][a-zA-Z0-9]*\.?[A-Za-z0-9]*/?[A-Za-
z0-9]*)')
FALSE_POS = re.compile(r'^[A-Z][a-z]{4,40}\)?\.?')
FALSE_POS1 = re.compile(r'C\.I\..*')
FALSE_POS2 = re.compile(r'vit.*')
FALSE_NEG = re.compile(r'C\d+\.')

def iter_elements(tokens):
product = []
for tok in tokens:
if EINECS.match(tok) and len(product) >= 3:
match = re.match(FORMULA,product[-1])
match_false_pos = re.match(FALSE_POS,product[-1])
match_false_pos1 = re.match(FALSE_POS1,product[-1])
match_false_pos2 = re.match(FALSE_POS2,product[2])
match_false_neg = re.match(FALSE_NEG,product[-1])
if match_false_neg:
product[2:-1] = [' '.join(product[2:])]
del product[-1]
yield product
product = []
elif match_false_pos:
product[2:-1] = [' '.join(product[2:])]
del product[-1]
yield product
product = []
elif match:
product[2:-1] = [' '.join(product[2:-1])]
yield product
product = []
elif match_false_pos1 or match_false_pos2:
product[2:-1] = [' '.join(product[2:])]
del product[-1]
yield product
product = []
else:
product[2:-1] = [' '.join(product[2:])]
del product[-1]
yield product
product = []
product.append(tok)
yield product

for text in os.listdir(path):
input_text = os.path.join(path,text)
output_text = os.path.join(path2,text)
input = codecs.open(input_text, 'r','utf8')
output = codecs.open(output_text, 'w', 'utf8')
tokens = input.read().split()
for element in iter_elements(tokens):
output.write('|'.join(element))
output.write("\r\n")
input.close()
output.close()
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Similar Threads


Members online

Forum statistics

Threads
473,769
Messages
2,569,580
Members
45,054
Latest member
TrimKetoBoost

Latest Threads

Top