Raymond Hettinger said:
The source for the tokenize module covers all these bases.
# tokenize text replace
import keyword, os, sys, traceback
import string, cStringIO
import token, tokenize
######################################################################
class Parser:
"""python source code tokenizing text replacer
"""
def __init__(self, raw, out=sys.stdout):
''' Store the source text & set some flags.
'''
self.raw = string.strip(string.expandtabs(raw))
self.out = out
def format(self, search='' ,replace='',
replacetokentype=token.NAME):
''' Parse and send text.
'''
# Store line offsets in self.lines
self.lines = [0, 0]
pos = 0
self.temp = cStringIO.StringIO()
self.searchtext = search
self.replacetext = replace
self.replacetokentype = replacetokentype
# Gather lines
while 1:
pos = string.find(self.raw, '\n', pos) + 1
if not pos: break
self.lines.append(pos)
self.lines.append(len(self.raw))
# Wrap text in a filelike object
self.pos = 0
text = cStringIO.StringIO(self.raw)
# Parse the source.
## Tokenize calls the __call__
## function for each token till done.
try:
tokenize.tokenize(text.readline, self)
except tokenize.TokenError, ex:
traceback.print_exc()
def __call__(self, toktype, toktext,
(srow,scol), (erow,ecol), line):
''' Token handler.
'''
# calculate new positions
oldpos = self.pos
newpos = self.lines[srow] + scol
self.pos = newpos + len(toktext)
# handle newlines
if toktype in [token.NEWLINE, tokenize.NL]:
self.out.write('\n')
return
# send the original whitespace, if needed
if newpos > oldpos:
self.out.write(self.raw[oldpos:newpos])
# skip indenting tokens
if toktype in [token.INDENT, token.DEDENT]:
self.pos = newpos
return
# search for matches to our searchtext
# customize this for your exact needs
if (toktype == self.replacetokentype and
toktext == self.searchtext):
toktext = self.replacetext
# write it out
self.out.write(toktext)
return
######################################################################
# just an example
def Main():
import sys
if sys.argv[0]:
filein = open(sys.argv[0]).read()
Parser(filein, out=sys.stdout).format('tokenize', 'MyNewName')
######################################################################
if __name__ == '__main__':
Main()
# end of code
This is an example of how to use tokenize to replace names
that match a search string.
If you wanted to only replace strings and not
names then change the replacetokentype to
token.STRING instead of token.NAME etc...
HTH,
M.E.Farmer