Duncan said:
reads the entire file into memory and splits it up into a list of lines
then iterates over the list. If you break from the loop, tough you've lost
any lines that were read but you didn't handle.
reads part of the file and strips off one line at a time. Never creates a
list. Reads more only when it runs out of the block it read. If you break
from the loop you can do another 'for line in file' and get the remaining
lines.
However, one thing that bit me was that you cant use f.tell() to get the
current position of the line in the file. If you use "for line in
fileobject:" and then you first line is fileobject.tell() that will
return the end of file position and not the position of the next line.
Might be a bit counter-intuitive.
I am learning to be a better python programmer and I have written this
small program to parse Mail box files and display emails which match the
specified text. Any comments on this will appreciated. I know I can read
the whole file using readlines(), not sure if that is good idea?
Batigol:~/pgrep hari$ cat pgrep.py
import sys
hits = {}
lines = {}
count = 0
emailstart = "From -"
def build(f, str):
global count, hits, lines
f.seek(0)
start_email = 0
end_email = 0
pointers = []
str_matched = []
found = 0
line = f.readline()
while line != '':
if line.find(emailstart) != -1:
# Start of Mail
start_email = f.tell()
if found == 1:
#print "From - inside found "
pointers.append(end_email)
found = 0
hits[count] = pointers
lines[count] = str_matched
count += 1
pointers = []
str_matched = []
if line.find(str) != -1:
# Found string
#print "Found string: "
#print "count", count
if len(pointers) == 0:
pointers.append(start_email)
found = 1
str_matched.append(line)
#lines[count] = line
end_email = f.tell()
line = f.readline()
def display(f):
global count, hits, lines
if count == 0:
sys.stdout.write("Not found! \n")
sys.stdout.flush()
sys.exit(0)
sys.stdout.write("#: Line Contents\n")
for i in range(count):
for j in range(len(lines
)):
choice = "%s: %s" %(i, lines[j])
sys.stdout.write(choice)
sys.stdout.write("Enter # of email to display: ")
sys.stdout.flush()
input = sys.stdin.readline()
try:
i = int(input.strip())
f.seek(hits[0])
while f.tell() != hits[1]:
sys.stdout.write(f.readline())
except:
sys.stderr.write("Invalid choice\n")
sys.stdout.flush()
if __name__ == "__main__":
try:
f = file(sys.argv[1], "r")
except:
sys.stdout.write("Error opening file\n")
sys.exit(1)
build(f, sys.argv[2])
response = 'n'
#print response
while response == 'n':
display(f)
sys.stdout.write("Do you want to quit, y or n? ")
sys.stdout.flush()
response = sys.stdin.readline().strip()
f.close()
sys.exit(0)
Thanks,
Hari