T
Tim
I ran into a problem with a script i was playing with to check code
indents and need some direction. It seems to depend on if tabsize is
set to 4 in editor and spaces and tabs indents are mixed on consecutive
lines. Works fine when editors tabsize was 8 regardless if indents are
mixed.
Below are how the 3 test files are laid out, the sample code and output
I get.
Any help on how to detect this correctly would be appreciated.
# nano -T4 tabspacing_4.py
class Test:
"""triple quote""" #indent is 1 tab
def __init__(self, msg): #indent is 4 spaces <<
this gets reported as a dedent when there is no change in indent level
self.msg = msg #indent is 2 tabs
#nano -T8 tabspacing_8A.py
class Test:
"""triple quote""" #indent is 1 tab
def __init__(self, msg): #indent is 8 spaces << no
indent change reported
self.msg = msg #indent is 1 tab + 4 spaces
#nano -T8 tabspacing_8B.py
class Test:
"""triple quote""" #indent is 1 tab
def __init__(self, msg): #indent is 1 tab <<
no indent change reported
self.msg = msg #indent is 1 tab + 4 spaces
My script
#!/usr/bin/env python
import tokenize
from sys import argv
indent_lvl = 0
line_number = 0
lines = file(argv[1]).readlines()
done = False
def parse():
def feed():
global line_number, lines
if line_number < len(lines):
txt = lines[line_number]
line_number += 1
else:
txt = ''
return txt
def indents(type, token, start, end, line):
global indent_lvl, done
if type == tokenize.DEDENT:
indent_lvl -= 1
elif type == tokenize.INDENT:
indent_lvl += 1
elif type == tokenize.ENDMARKER:
done = True
return
else:
return
print "token=%s, line_number=%i, indent_lvl=%i" %
(tokenize.tok_name[type], start[0], indent_lvl), line.strip()
while not done:
tokenize.tokenize(feed, indents)
parse()
$ ./sample.py tabspacing_4.py
token=INDENT, line_number=3, indent_lvl=1 """triple quote"""
#indent is 1 tab
token=DEDENT, line_number=4, indent_lvl=0 def __init__(self, msg):
#indent is 4 spaces <-- PROBLEM HERE
token=INDENT, line_number=5, indent_lvl=1 self.msg = msg
#indent is 2 tabs
token=DEDENT, line_number=8, indent_lvl=0
$ ./sample.py tabspacing_8A.py
token=INDENT, line_number=3, indent_lvl=1 """triple quote"""
#indent is 1 tab
token=INDENT, line_number=5, indent_lvl=2 self.msg = msg
#indent is 1 tab + 4 spaces
token=DEDENT, line_number=8, indent_lvl=1
token=DEDENT, line_number=8, indent_lvl=0
$ ./sample.py tabspacing_8B.py
token=INDENT, line_number=3, indent_lvl=1 """triple quote"""
#indent is 1 tab
token=INDENT, line_number=5, indent_lvl=2 self.msg = msg
#indent is 1 tab + 4 spaces
token=DEDENT, line_number=8, indent_lvl=1
token=DEDENT, line_number=8, indent_lvl=0
indents and need some direction. It seems to depend on if tabsize is
set to 4 in editor and spaces and tabs indents are mixed on consecutive
lines. Works fine when editors tabsize was 8 regardless if indents are
mixed.
Below are how the 3 test files are laid out, the sample code and output
I get.
Any help on how to detect this correctly would be appreciated.
# nano -T4 tabspacing_4.py
class Test:
"""triple quote""" #indent is 1 tab
def __init__(self, msg): #indent is 4 spaces <<
this gets reported as a dedent when there is no change in indent level
self.msg = msg #indent is 2 tabs
#nano -T8 tabspacing_8A.py
class Test:
"""triple quote""" #indent is 1 tab
def __init__(self, msg): #indent is 8 spaces << no
indent change reported
self.msg = msg #indent is 1 tab + 4 spaces
#nano -T8 tabspacing_8B.py
class Test:
"""triple quote""" #indent is 1 tab
def __init__(self, msg): #indent is 1 tab <<
no indent change reported
self.msg = msg #indent is 1 tab + 4 spaces
My script
#!/usr/bin/env python
import tokenize
from sys import argv
indent_lvl = 0
line_number = 0
lines = file(argv[1]).readlines()
done = False
def parse():
def feed():
global line_number, lines
if line_number < len(lines):
txt = lines[line_number]
line_number += 1
else:
txt = ''
return txt
def indents(type, token, start, end, line):
global indent_lvl, done
if type == tokenize.DEDENT:
indent_lvl -= 1
elif type == tokenize.INDENT:
indent_lvl += 1
elif type == tokenize.ENDMARKER:
done = True
return
else:
return
print "token=%s, line_number=%i, indent_lvl=%i" %
(tokenize.tok_name[type], start[0], indent_lvl), line.strip()
while not done:
tokenize.tokenize(feed, indents)
parse()
$ ./sample.py tabspacing_4.py
token=INDENT, line_number=3, indent_lvl=1 """triple quote"""
#indent is 1 tab
token=DEDENT, line_number=4, indent_lvl=0 def __init__(self, msg):
#indent is 4 spaces <-- PROBLEM HERE
token=INDENT, line_number=5, indent_lvl=1 self.msg = msg
#indent is 2 tabs
token=DEDENT, line_number=8, indent_lvl=0
$ ./sample.py tabspacing_8A.py
token=INDENT, line_number=3, indent_lvl=1 """triple quote"""
#indent is 1 tab
token=INDENT, line_number=5, indent_lvl=2 self.msg = msg
#indent is 1 tab + 4 spaces
token=DEDENT, line_number=8, indent_lvl=1
token=DEDENT, line_number=8, indent_lvl=0
$ ./sample.py tabspacing_8B.py
token=INDENT, line_number=3, indent_lvl=1 """triple quote"""
#indent is 1 tab
token=INDENT, line_number=5, indent_lvl=2 self.msg = msg
#indent is 1 tab + 4 spaces
token=DEDENT, line_number=8, indent_lvl=1
token=DEDENT, line_number=8, indent_lvl=0