M
Matt
I have a parser program which queries a online shopping comparison web
page and extracts the information needed. I am trying to run this
program with different search terms which are created by entering a
sentence, so each one is sent separately, however the outputs (text
files) are the same for each word, despite the correct term and output
file seeming passed. I suspect it might be that the connection is not
being closed each time but am not sure why this is happening.
If i create an identical copy of the program and run that after the
first one it works but this is not an appropriate solution.
Any help would be much appreciated. Here is some of my code, if more
is required i will post.
To run the program:
StringTokenizer t = new StringTokenizer("red green yellow", " ");
int c = 0;
Parser1 p = new Parser1();
while (t.hasMoreTokens()) {
c++;
String tok = t.nextToken();
File tem = new File("C:/"+c+".txt");
p.mainprog(tok, tem);
p.mainprog(tok, tem)
p.mainprog(tok, tem);
}
The parser:
import javax.swing.text.html.parser.*;
import javax.swing.text.html.*;
import javax.swing.text.*;
import java.awt.*;
import java.util.*;
import javax.swing.*;
import java.io.*;
import java.net.*;
public class Parser1 extends HTMLEditorKit.ParserCallback {
variable declarations
public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int
pos){
...methods
}
public void handleText(char[] data, int pos){
...methods
}
public void handleTitleTag(HTML.Tag t, char[] data){
}
public void handleEmptyTag(HTML.Tag t, char[] data){
}
public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int
pos){
...methods
}
static void mainprog(String term, File file) {
....proxy and authentication methods
Authenticator.setDefault(new MyAuthenticator() );
HTMLEditorKit editorKit = new HTMLEditorKit();
HTMLDocument HTMLDoc;
Reader HTMLReader;
try {
String temp = new String(term);
String fullurl = new String(MainUrl+temp);
url = new URL(fullurl);
InputStream myInStream;
myInStream =
url.openConnection().getInputStream();
HTMLReader = (new
InputStreamReader(myInStream));
HTMLDoc = (HTMLDocument)
editorKit.createDefaultDocument();
HTMLDoc.putProperty("IgnoreCharsetDirective",
new Boolean(true));
ParserDelegator parser = new
ParserDelegator();
HTMLEditorKit.ParserCallback callback = new
Parser1();
parser.parse(HTMLReader, callback, true);
callback.flush();
HTMLReader.close();
myInStream.close();
}
catch (IOException IOE) {
IOE.printStackTrace();
}
catch (Exception e) {
e.printStackTrace();
}
try {
FileWriter writer = new FileWriter(file);
BufferedWriter bw = new BufferedWriter(writer);
for (int i = 0; i < vect.size(); i++){
bw.write((String)vect.elementAt(i));
if (vect.elementAt(i)!=vect.lastElement()){
bw.newLine();
}
}
bw.flush();
bw.close();
writer.close();
}
catch (IOException IOE) {
IOE.printStackTrace();
}
catch (Exception e) {
e.printStackTrace();
}
} catch (IOException IOE) {
System.out.println("User options not found.");
}
}
}
page and extracts the information needed. I am trying to run this
program with different search terms which are created by entering a
sentence, so each one is sent separately, however the outputs (text
files) are the same for each word, despite the correct term and output
file seeming passed. I suspect it might be that the connection is not
being closed each time but am not sure why this is happening.
If i create an identical copy of the program and run that after the
first one it works but this is not an appropriate solution.
Any help would be much appreciated. Here is some of my code, if more
is required i will post.
To run the program:
StringTokenizer t = new StringTokenizer("red green yellow", " ");
int c = 0;
Parser1 p = new Parser1();
while (t.hasMoreTokens()) {
c++;
String tok = t.nextToken();
File tem = new File("C:/"+c+".txt");
p.mainprog(tok, tem);
p.mainprog(tok, tem)
p.mainprog(tok, tem);
}
The parser:
import javax.swing.text.html.parser.*;
import javax.swing.text.html.*;
import javax.swing.text.*;
import java.awt.*;
import java.util.*;
import javax.swing.*;
import java.io.*;
import java.net.*;
public class Parser1 extends HTMLEditorKit.ParserCallback {
variable declarations
public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int
pos){
...methods
}
public void handleText(char[] data, int pos){
...methods
}
public void handleTitleTag(HTML.Tag t, char[] data){
}
public void handleEmptyTag(HTML.Tag t, char[] data){
}
public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int
pos){
...methods
}
static void mainprog(String term, File file) {
....proxy and authentication methods
Authenticator.setDefault(new MyAuthenticator() );
HTMLEditorKit editorKit = new HTMLEditorKit();
HTMLDocument HTMLDoc;
Reader HTMLReader;
try {
String temp = new String(term);
String fullurl = new String(MainUrl+temp);
url = new URL(fullurl);
InputStream myInStream;
myInStream =
url.openConnection().getInputStream();
HTMLReader = (new
InputStreamReader(myInStream));
HTMLDoc = (HTMLDocument)
editorKit.createDefaultDocument();
HTMLDoc.putProperty("IgnoreCharsetDirective",
new Boolean(true));
ParserDelegator parser = new
ParserDelegator();
HTMLEditorKit.ParserCallback callback = new
Parser1();
parser.parse(HTMLReader, callback, true);
callback.flush();
HTMLReader.close();
myInStream.close();
}
catch (IOException IOE) {
IOE.printStackTrace();
}
catch (Exception e) {
e.printStackTrace();
}
try {
FileWriter writer = new FileWriter(file);
BufferedWriter bw = new BufferedWriter(writer);
for (int i = 0; i < vect.size(); i++){
bw.write((String)vect.elementAt(i));
if (vect.elementAt(i)!=vect.lastElement()){
bw.newLine();
}
}
bw.flush();
bw.close();
writer.close();
}
catch (IOException IOE) {
IOE.printStackTrace();
}
catch (Exception e) {
e.printStackTrace();
}
} catch (IOException IOE) {
System.out.println("User options not found.");
}
}
}