J
Jabba Laci
Hi,
I have a simple PyQt application that creates a webkit instance to
scrape AJAX web pages. It works well but I can't call it twice. I
think the application is not closed correctly, that's why the 2nd call
fails. Here is the code below. I also put it on pastebin:
http://pastebin.com/gkgSSJHY .
The question is: how to call this code several times within a script.
Thanks,
Laszlo
=====
import sys
from PyQt4.QtGui import QApplication
from PyQt4.QtWebKit import QWebPage
from PyQt4.QtCore import QUrl
class SimpleWebkit(QWebPage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.loadFinished.connect(self.save)
self.mainFrame().load(QUrl(url))
self.app.exec_()
def save(self):
self.html = self.mainFrame().toHtml()
self.app.quit()
def get_html(url):
s = SimpleWebkit(url)
return str(s.html) # QString to string !
#####
if __name__ == "__main__":
url = 'http://simile.mit.edu/crowbar/test.html'
print get_html(url) # OK
print '=========='
print get_html(url) # problem here, never called
I have a simple PyQt application that creates a webkit instance to
scrape AJAX web pages. It works well but I can't call it twice. I
think the application is not closed correctly, that's why the 2nd call
fails. Here is the code below. I also put it on pastebin:
http://pastebin.com/gkgSSJHY .
The question is: how to call this code several times within a script.
Thanks,
Laszlo
=====
import sys
from PyQt4.QtGui import QApplication
from PyQt4.QtWebKit import QWebPage
from PyQt4.QtCore import QUrl
class SimpleWebkit(QWebPage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.loadFinished.connect(self.save)
self.mainFrame().load(QUrl(url))
self.app.exec_()
def save(self):
self.html = self.mainFrame().toHtml()
self.app.quit()
def get_html(url):
s = SimpleWebkit(url)
return str(s.html) # QString to string !
#####
if __name__ == "__main__":
url = 'http://simile.mit.edu/crowbar/test.html'
print get_html(url) # OK
print '=========='
print get_html(url) # problem here, never called