K
Kriston-Vizi Janos
Dear Mr. Kern, and Members,
Thank you very much for the fast answer, my question became
over-simplified.
My source code is appended below. It uses two text files (L.txt and
GC.txt) as input and merges them. Please find these two files here:
http://kristonvizi.hu/L.txt
http://kristonvizi.hu/GC.txt
Both L.txt and GC.txt contains 3000 rows. When running, the code stops
with error message:
'The debugged program raised the exception IndexError "list index out of
range"
File: /home/kvjanos/file.py, Line: 91'
And I noticed that all the lists that should contain 3000 items,
contains less as follows:
NIR_mean_l = 1000 items
NIR_stdev_l = 1000 items
R_mean_l = 1000 items
R_stdev_l = 1000 items
G_mean_l = 999 items
G_stdev_l = 999 items
area_l = 999 items
NIR_mean_gc = 1000 items
NIR_stdev_gc = 1000 items
R_mean_gc = 1000 items
R_stdev_gc = 1000 items
G_mean_gc = 999 items
G_stdev_gc = 999 items
area_gc = 999 items
This is why I thought it is a limit in list items number.
Code that's failing:
#*******************************************
import string,sys,os,sets
# Open L, GC txt files and create merged file
inp_file_l = open('/home/kvjanos/L/L.txt')
inp_file_gc = open('/home/kvjanos/GC/GC.txt')
out_file = open('/home/kvjanos/L_GC_merged/merged.txt', 'w')
# Define L lists
NIR_mean_l = []
NIR_stdev_l =[]
R_mean_l = []
R_stdev_l =[]
G_mean_l = []
G_stdev_l =[]
area_l = []
# Define GC lists
NIR_mean_gc = []
NIR_stdev_gc =[]
R_mean_gc = []
R_stdev_gc =[]
G_mean_gc = []
G_stdev_gc =[]
area_gc = []
# Processing L file
line_no_l =0 # Input L file line number
type_l = 1 # Input L file row type: 1 (row n),2 (row n+1) or 3 (row n+2)
# Append L values to lists.
for line in inp_file_l.xreadlines():
line_no_l = line_no_l + 1
if line_no_l == 1: # To skip the header row
continue
data_l = [] # An L row
data_l = line.split()
if type_l == 1:
NIR_mean_l.append(data_l[2]) # Append 3rd item of the row to
the list
NIR_stdev_l.append(data_l[3]) # Append 4th item of the row to
the list
type_l = 2 # Change to row n+1
else:
if type_l == 2:
R_mean_l.append(data_l[2])
R_stdev_l.append(data_l[3])
type_l = 3
else:
G_mean_l.append(data_l[2])
G_stdev_l.append(data_l[3])
area_l.append(data_l[1])
type_l = 1
inp_file_l.close()
# Processing GC file, the same way as L file above
line_no_gc =0
type_gc = 1
for line in inp_file_gc.xreadlines():
line_no_gc = line_no_gc+ 1
if line_no_gc== 1:
continue
data_gc = []
data_gc = line.split()
if type_gc== 1:
NIR_mean_gc.append(data_gc[2])
NIR_stdev_gc.append(data_gc[3])
type_gc= 2
else:
if type_gc== 2:
R_mean_gc.append(data_gc[2])
R_stdev_gc.append(data_gc[3])
type_gc= 3
else:
G_mean_gc.append(data_gc[2])
G_stdev_gc.append(data_gc[3])
area_gc.append(data_gc[1])
type_gc= 1
inp_file_gc.close()
#############################
# Create output rows from lists
for i in range(len(NIR_mean_l)): # Process all input rows
# Filters L rows by 'area_l' values
area_l_rossz = string.atof(area_l)
if area_l_rossz < 10000:
continue
elif area_l_rossz > 100000:
continue
# Filters GC rows by 'area_gc' values
area_gc_rossz = string.atof(area_gc)
if area_gc_rossz < 10000:
continue
elif area_gc_rossz > 200000:
continue
# Create output line and write out
newline = []
newline.append(str(i+1))
# L
newline.append(NIR_mean_l)
newline.append(NIR_stdev_l)
newline.append(R_mean_l)
newline.append(R_stdev_l)
newline.append(G_mean_l)
newline.append(G_stdev_l)
newline.append(area_l)
# GC
newline.append(NIR_mean_gc)
newline.append(NIR_stdev_gc)
newline.append(R_mean_gc)
newline.append(R_stdev_gc)
newline.append(G_mean_gc)
newline.append(G_stdev_gc)
newline.append(area_gc)
outline = string.join(newline,'\t') + '\n'
out_file.writelines(outline)
out_file.close()
#*******************************************
Thnx again,
Janos
Thank you very much for the fast answer, my question became
over-simplified.
My source code is appended below. It uses two text files (L.txt and
GC.txt) as input and merges them. Please find these two files here:
http://kristonvizi.hu/L.txt
http://kristonvizi.hu/GC.txt
Both L.txt and GC.txt contains 3000 rows. When running, the code stops
with error message:
'The debugged program raised the exception IndexError "list index out of
range"
File: /home/kvjanos/file.py, Line: 91'
And I noticed that all the lists that should contain 3000 items,
contains less as follows:
NIR_mean_l = 1000 items
NIR_stdev_l = 1000 items
R_mean_l = 1000 items
R_stdev_l = 1000 items
G_mean_l = 999 items
G_stdev_l = 999 items
area_l = 999 items
NIR_mean_gc = 1000 items
NIR_stdev_gc = 1000 items
R_mean_gc = 1000 items
R_stdev_gc = 1000 items
G_mean_gc = 999 items
G_stdev_gc = 999 items
area_gc = 999 items
This is why I thought it is a limit in list items number.
Code that's failing:
#*******************************************
import string,sys,os,sets
# Open L, GC txt files and create merged file
inp_file_l = open('/home/kvjanos/L/L.txt')
inp_file_gc = open('/home/kvjanos/GC/GC.txt')
out_file = open('/home/kvjanos/L_GC_merged/merged.txt', 'w')
# Define L lists
NIR_mean_l = []
NIR_stdev_l =[]
R_mean_l = []
R_stdev_l =[]
G_mean_l = []
G_stdev_l =[]
area_l = []
# Define GC lists
NIR_mean_gc = []
NIR_stdev_gc =[]
R_mean_gc = []
R_stdev_gc =[]
G_mean_gc = []
G_stdev_gc =[]
area_gc = []
# Processing L file
line_no_l =0 # Input L file line number
type_l = 1 # Input L file row type: 1 (row n),2 (row n+1) or 3 (row n+2)
# Append L values to lists.
for line in inp_file_l.xreadlines():
line_no_l = line_no_l + 1
if line_no_l == 1: # To skip the header row
continue
data_l = [] # An L row
data_l = line.split()
if type_l == 1:
NIR_mean_l.append(data_l[2]) # Append 3rd item of the row to
the list
NIR_stdev_l.append(data_l[3]) # Append 4th item of the row to
the list
type_l = 2 # Change to row n+1
else:
if type_l == 2:
R_mean_l.append(data_l[2])
R_stdev_l.append(data_l[3])
type_l = 3
else:
G_mean_l.append(data_l[2])
G_stdev_l.append(data_l[3])
area_l.append(data_l[1])
type_l = 1
inp_file_l.close()
# Processing GC file, the same way as L file above
line_no_gc =0
type_gc = 1
for line in inp_file_gc.xreadlines():
line_no_gc = line_no_gc+ 1
if line_no_gc== 1:
continue
data_gc = []
data_gc = line.split()
if type_gc== 1:
NIR_mean_gc.append(data_gc[2])
NIR_stdev_gc.append(data_gc[3])
type_gc= 2
else:
if type_gc== 2:
R_mean_gc.append(data_gc[2])
R_stdev_gc.append(data_gc[3])
type_gc= 3
else:
G_mean_gc.append(data_gc[2])
G_stdev_gc.append(data_gc[3])
area_gc.append(data_gc[1])
type_gc= 1
inp_file_gc.close()
#############################
# Create output rows from lists
for i in range(len(NIR_mean_l)): # Process all input rows
# Filters L rows by 'area_l' values
area_l_rossz = string.atof(area_l)
if area_l_rossz < 10000:
continue
elif area_l_rossz > 100000:
continue
# Filters GC rows by 'area_gc' values
area_gc_rossz = string.atof(area_gc)
if area_gc_rossz < 10000:
continue
elif area_gc_rossz > 200000:
continue
# Create output line and write out
newline = []
newline.append(str(i+1))
# L
newline.append(NIR_mean_l)
newline.append(NIR_stdev_l)
newline.append(R_mean_l)
newline.append(R_stdev_l)
newline.append(G_mean_l)
newline.append(G_stdev_l)
newline.append(area_l)
# GC
newline.append(NIR_mean_gc)
newline.append(NIR_stdev_gc)
newline.append(R_mean_gc)
newline.append(R_stdev_gc)
newline.append(G_mean_gc)
newline.append(G_stdev_gc)
newline.append(area_gc)
outline = string.join(newline,'\t') + '\n'
out_file.writelines(outline)
out_file.close()
#*******************************************
Thnx again,
Janos