.py to sqlite translator [1 of 2]

S

smitty1e

Disclaimer(s): the author is nobody's pythonista. This could probably
be done more elegantly.
The driver for the effort is to get PyMacs to work with new-style
classes.
This rendering stage stands alone, and might be used for other
purposes.
A subsequent post will show using the resulting file to produce (I
think valid) .el trampoline
signatures for PyMacs.
If nothing else, it shows some python internals in an interesting way.
Tested against version 2.5.1
Maybe "lumberjack.py" would be a better name, since "It cuts down
trees, goes real slow, and uses disk galore. Wishes it'd been
webfoot[1], just like its dear author".
Cheers,
Chris

[1] Author was born in Oregon.

#A sample file:
class sample( object ):
"""fairly trivial sample class for demonstration purposes.
"""
def __init__( self
, some_string ):
self.hold_it = some_string

def show( self ):
print self.hold_it

#Invocation:
# ./pysqlrender.py -f sample.py -o output

#Script:
#!/usr/bin/python

"""Script to dump the parse tree of an input file to a SQLite
database.
"""

from optparse import OptionParser
import os
import parser
import pprint
import re
import sqlite3
import symbol
import token
import types

from types import ListType \
, TupleType

target_table = """CREATE TABLE tbl_parse_tree (
parse_tree_id INTEGER PRIMARY KEY
AUTOINCREMENT
, parse_tree_symbol_id
, parse_tree_indent
, parse_tree_value );"""

target_insert = """INSERT INTO tbl_parse_tree (
parse_tree_symbol_id
, parse_tree_indent
, parse_tree_value )
VALUES (%s, %s, '%s' );"""

symbol_table = """CREATE TABLE tlp_parse_tree_symbol (
parse_tree_symbol_id INTEGER PRIMARY KEY
, parse_tree_symbol_val );"""
symbol_insert = """INSERT INTO tlp_parse_tree_symbol (
parse_tree_symbol_id
, parse_tree_symbol_val )
VALUES ( %s, '%s' );"""

class symbol_manager( object ):
""" Class to merge symbols and tokens for ease of use.
"""
def __init__( self
, c ):
for k in symbol.sym_name:
sql = symbol_insert % ( k, symbol.sym_name[k] )
try:
c.execute( sql )
except sqlite3.IntegrityError:
pass
for k in token.tok_name:
sql = symbol_insert % ( k, token.tok_name[k] )
try:
c.execute( sql )
except sqlite3.IntegrityError:
pass

def get_symbol( self
, key ):
ret = -1
if symbol.sym_name.has_key(key): ret = symbol.sym_name[key]
elif token.tok_name.has_key(key) : ret = token.tok_name[ key]
return ret

def recurse_it( self, tester ):
"""Check to see if dump_tup should recurse
"""
if self.get_symbol(tester) > 0:
return True
return False

class stocker( object ):
"""Remembers the depth of the tree and effects the INSERTs
into the output file.
"""
def __init__( self ):
self.cur_indent = 0

def do_symbol( self
, c
, symbol_value
, val = "" ):
"""Stuff something from the parse tree into the database
table.
"""
if symbol_value==5: self.cur_indent += 1
elif symbol_value==6: self.cur_indent -= 1

try:
sql = target_insert \
% ( symbol_value
, self.cur_indent
, re.sub( "'", "`", str(val) ))
c.execute( sql )
except AttributeError:
print "connection bad in lexer"
except sqlite3.OperationalError:
print "suckage at indent of %s for %s" \
% (self.cur_indent, sql)

def dump_tup( tup
, sym
, c
, stok ):
"""Recursive function to descend TUP and analyze its elements.
tup parse tree of a file, rendered as a tuple
sym dictionary rendered from symbol module
c live database cursor
stok output object effect token storage
"""
for node in tup:
typ = type( node )
r = getattr( typ
, "__repr__"
, None )

if (issubclass(typ, tuple) and r is tuple.__repr__):

if token.tok_name.has_key( node[0] ):
stok.do_symbol( c
, node[0]
, node[1] )
elif sym.recurse_it( node[0] ):
stok.do_symbol( c
, node[0]
, '__py__' ) #If you say node[1] here,
# the sqlite file is fat
# and instructive
for node2 in node[1:]:
dump_tup( node2
, sym
, c
, stok )
else:
stok.do_symbol( c
, node[0]
, node[1] )
dump_tup( node[1]
, sym
, c
, stok )
else:
stok.do_symbol( c
, 0
, node )


def convert_python_source_tree_to_table( file_name
, target_name ):
"""Retrieve information from the parse tree of a source file.
Create an output database file in sqlite.
Make a table in there, and then procede to stuff the flattened
input parse tree into it.

file_name Name of the file to read Python source code from.
target_name Name for the sqlite database
"""
x = open( file_name ).readlines()
y = []
[y.append( line.replace("\r\n","") ) for line in x]

ast = parser.suite( "\n".join(y) )
conn = sqlite3.connect( target_name )
conn.isolation_level = None
c = conn.cursor()
c.execute( target_table )
c.execute( symbol_table )
sym = symbol_manager( c )
stok = stocker()

#pprint.pprint( ast.totuple() )
dump_tup( ast.totuple()
, sym
, c
, stok )

def main():
usage = "usage: %prog [options] arg"
parser = OptionParser(usage)
parser.add_option("-f", "--file", dest="filename"
, action="store", type="string"
, help ="read python source from FILENAME")
#TODO: test for existence of output file, eject if exists
parser.add_option("-o", "--output",dest="output"
, action="store", type="string"
, help ="name of sqlite output file")
(options, args) = parser.parse_args()

convert_python_source_tree_to_table( options.filename
, options.output )

if __name__ == "__main__":
main()
 
G

Guilherme Polo

2007/10/26 said:
Disclaimer(s): the author is nobody's pythonista. This could probably
be done more elegantly.
The driver for the effort is to get PyMacs to work with new-style
classes.
This rendering stage stands alone, and might be used for other
purposes.
A subsequent post will show using the resulting file to produce (I
think valid) .el trampoline
signatures for PyMacs.
If nothing else, it shows some python internals in an interesting way.
Tested against version 2.5.1
Maybe "lumberjack.py" would be a better name, since "It cuts down
trees, goes real slow, and uses disk galore. Wishes it'd been
webfoot[1], just like its dear author".
Cheers,
Chris

[1] Author was born in Oregon.

#A sample file:
class sample( object ):
"""fairly trivial sample class for demonstration purposes.
"""
def __init__( self
, some_string ):
self.hold_it = some_string

def show( self ):
print self.hold_it

#Invocation:
# ./pysqlrender.py -f sample.py -o output

#Script:
#!/usr/bin/python

"""Script to dump the parse tree of an input file to a SQLite
database.
"""

from optparse import OptionParser
import os
import parser
import pprint
import re
import sqlite3
import symbol
import token
import types

from types import ListType \
, TupleType

target_table = """CREATE TABLE tbl_parse_tree (
parse_tree_id INTEGER PRIMARY KEY
AUTOINCREMENT
, parse_tree_symbol_id
, parse_tree_indent
, parse_tree_value );"""

target_insert = """INSERT INTO tbl_parse_tree (
parse_tree_symbol_id
, parse_tree_indent
, parse_tree_value )
VALUES (%s, %s, '%s' );"""

symbol_table = """CREATE TABLE tlp_parse_tree_symbol (
parse_tree_symbol_id INTEGER PRIMARY KEY
, parse_tree_symbol_val );"""
symbol_insert = """INSERT INTO tlp_parse_tree_symbol (
parse_tree_symbol_id
, parse_tree_symbol_val )
VALUES ( %s, '%s' );"""

class symbol_manager( object ):
""" Class to merge symbols and tokens for ease of use.
"""
def __init__( self
, c ):
for k in symbol.sym_name:
sql = symbol_insert % ( k, symbol.sym_name[k] )
try:
c.execute( sql )
except sqlite3.IntegrityError:
pass
for k in token.tok_name:
sql = symbol_insert % ( k, token.tok_name[k] )
try:
c.execute( sql )
except sqlite3.IntegrityError:
pass

def get_symbol( self
, key ):
ret = -1
if symbol.sym_name.has_key(key): ret = symbol.sym_name[key]
elif token.tok_name.has_key(key) : ret = token.tok_name[ key]
return ret

def recurse_it( self, tester ):
"""Check to see if dump_tup should recurse
"""
if self.get_symbol(tester) > 0:
return True
return False

class stocker( object ):
"""Remembers the depth of the tree and effects the INSERTs
into the output file.
"""
def __init__( self ):
self.cur_indent = 0

def do_symbol( self
, c
, symbol_value
, val = "" ):
"""Stuff something from the parse tree into the database
table.
"""
if symbol_value==5: self.cur_indent += 1
elif symbol_value==6: self.cur_indent -= 1

try:
sql = target_insert \
% ( symbol_value
, self.cur_indent
, re.sub( "'", "`", str(val) ))
c.execute( sql )
except AttributeError:
print "connection bad in lexer"
except sqlite3.OperationalError:
print "suckage at indent of %s for %s" \
% (self.cur_indent, sql)

def dump_tup( tup
, sym
, c
, stok ):
"""Recursive function to descend TUP and analyze its elements.
tup parse tree of a file, rendered as a tuple
sym dictionary rendered from symbol module
c live database cursor
stok output object effect token storage
"""
for node in tup:
typ = type( node )
r = getattr( typ
, "__repr__"
, None )

if (issubclass(typ, tuple) and r is tuple.__repr__):

if token.tok_name.has_key( node[0] ):
stok.do_symbol( c
, node[0]
, node[1] )
elif sym.recurse_it( node[0] ):
stok.do_symbol( c
, node[0]
, '__py__' ) #If you say node[1] here,
# the sqlite file is fat
# and instructive
for node2 in node[1:]:
dump_tup( node2
, sym
, c
, stok )
else:
stok.do_symbol( c
, node[0]
, node[1] )
dump_tup( node[1]
, sym
, c
, stok )
else:
stok.do_symbol( c
, 0
, node )


def convert_python_source_tree_to_table( file_name
, target_name ):
"""Retrieve information from the parse tree of a source file.
Create an output database file in sqlite.
Make a table in there, and then procede to stuff the flattened
input parse tree into it.

file_name Name of the file to read Python source code from.
target_name Name for the sqlite database
"""
x = open( file_name ).readlines()
y = []
[y.append( line.replace("\r\n","") ) for line in x]

ast = parser.suite( "\n".join(y) )
conn = sqlite3.connect( target_name )
conn.isolation_level = None
c = conn.cursor()
c.execute( target_table )
c.execute( symbol_table )
sym = symbol_manager( c )
stok = stocker()

#pprint.pprint( ast.totuple() )
dump_tup( ast.totuple()
, sym
, c
, stok )

def main():
usage = "usage: %prog [options] arg"
parser = OptionParser(usage)
parser.add_option("-f", "--file", dest="filename"
, action="store", type="string"
, help ="read python source from FILENAME")
#TODO: test for existence of output file, eject if exists
parser.add_option("-o", "--output",dest="output"
, action="store", type="string"
, help ="name of sqlite output file")
(options, args) = parser.parse_args()

convert_python_source_tree_to_table( options.filename
, options.output )

if __name__ == "__main__":
main()

Hello, I took a look at that script and I have made some changes.
Before posting my version let me comment on somethings..

First, you probably noticed that it gets slow as you run the script
with "larger" files. All the time "wasted" on this is because you set
isolation level to None, so there are a lot of commits and commits
make this slow. Removing that isolation level and doing only one
commit after dump_tup cuts basically all time wasted;

Second, don't use "%s" to insert values into your sql query string
please. For sqlite you should substitute those by "?"s and pass a
tuple to it;

Third, don't use "yourdict.has_key(key)", use "key in yourdict". I
have read the Disclamer at top, but maybe you wanted to hear
something;

Fourth, It could be the email client but did you use 3 spaces for indent ? :/ ;

Fifth, other observations are left to the reader as exercise

My version:
#!/usr/bin/env python

"""Script to dump the parse tree of an input file to a SQLite
database.
"""

import token
import parser
import symbol
import sqlite3
from optparse import OptionParser

TARGET_TABLE = """CREATE TABLE tbl_parse_tree (
parse_tree_id INTEGER PRIMARY KEY AUTOINCREMENT,
parse_tree_symbol_id,
parse_tree_indent,
parse_tree_value)"""

TARGET_INSERT = """INSERT INTO tbl_parse_tree
(parse_tree_symbol_id, parse_tree_indent, parse_tree_value)
VALUES (?, ?, ?)"""

SYMBOL_TABLE = """CREATE TABLE tlp_parse_tree_symbol (
parse_tree_symbol_id INTEGER PRIMARY KEY,
parse_tree_symbol_val)"""

SYMBOL_INSERT = """INSERT INTO tlp_parse_tree_symbol
(parse_tree_symbol_id, parse_tree_symbol_val) VALUES (?, ?)"""

class SymbolManager(object):
"""Class to merge symbols and tokens for ease of use."""

def __init__(self, c):
self.to_merge = token.tok_name.copy()
self.to_merge.update(symbol.sym_name)

for k, v in self.to_merge.iteritems():
c.execute(SYMBOL_INSERT, (k, v))

def get_symbol(self, key):
return self.to_merge[key] if key in self.to_merge else -1

def recurse_it(self, tester):
"""Check to see if dump_tup should recurse"""
if self.get_symbol(tester) > 0:
return True

return False


class Stocker(object):
"""Remembers the depth of the tree and effects the INSERTs
into the output file.
"""

def __init__(self):
self.cur_indent = 0

def do_symbol(self, c, symbol_value, val=""):
"""Stuff something from the parse tree into the database table."""
if symbol_value == 5:
self.cur_indent += 1

elif symbol_value==6:
self.cur_indent -= 1

c.execute(TARGET_INSERT, (symbol_value, self.cur_indent,
str(val).replace("'", "`")))


def dump_tup(tup, sym, c, stok):
"""Recursive function to descend TUP and analyze its elements.
tup parse tree of a file, rendered as a tuple
sym dictionary rendered from symbol module
c live database cursor
stok output object effect token storage
"""
for node in tup:
typ = type(node)
r = getattr(typ, "__repr__", None)

if (issubclass(typ, tuple) and r is tuple.__repr__):
if node[0] in token.tok_name:
stok.do_symbol(c, node[0], node[1])
elif sym.recurse_it(node[0]):
#If you say node[1] here, the sqlite file is fat
# and instructive
stok.do_symbol(c, node[0], '__py__' )
for node2 in node[1:]:
dump_tup(node2, sym, c, stok)
else:
stok.do_symbol(c, node[0], node[1])
dump_tup(node[1], sym, c, stok)

else:
stok.do_symbol(c, 0, node)

def python_source_tree_to_db(file_name, target_name):
"""Retrieve information from the parse tree of a source file.
Create an output database file in sqlite.
Make a table in there, and then procede to stuff the flattened
input parse tree into it.

file_name Name of the file to read Python source code from.
target_name Name for the sqlite database
"""
conn = sqlite3.connect(target_name)
c = conn.cursor()
c.execute(TARGET_TABLE)
c.execute(SYMBOL_TABLE)

ast = parser.suite(''.join(open(file_name, 'rU').readlines()))
sym = SymbolManager(c)
stok = Stocker()

#pprint.pprint(ast.totuple())
dump_tup(ast.totuple(), sym, c, stok)
conn.commit()

def main():
oparser = OptionParser("usage: %prog [options] arg")
oparser.add_option("-f", "--file", dest="filename",
help="read python source from FILENAME")
oparser.add_option("-o", "--output", dest="output",
help="name of sqlite output file")
(options, _) = oparser.parse_args()

if not options.filename or not options.output:
oparser.print_help()
else:
try:
open(options.output)
print "Output file exists, chose another one."
except IOError:
python_source_tree_to_db(options.filename, options.output)


if __name__ == "__main__":
main()
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Similar Threads


Members online

Forum statistics

Threads
473,755
Messages
2,569,537
Members
45,020
Latest member
GenesisGai

Latest Threads

Top