html parser, some site work only

Discussion in 'Java' started by ABAKUS, Jun 13, 2004.

  1. ABAKUS

    ABAKUS Guest

    this code work only some site, why?
    Have you some ideas how it make?

    import java.io.*;
    import java.util.*;
    import java.net.*;
    import java.io.FileReader;
    import java.io.Reader;
    import java.io.FileNotFoundException;
    import javax.swing.text.html.HTML;
    import javax.swing.text.html.HTMLEditorKit;
    import javax.swing.text.html.HTMLEditorKit.ParserCallback;
    import javax.swing.text.html.parser.ParserDelegator;
    import javax.swing.text.MutableAttributeSet;




    class Meta {

    public static void main(String[] args) throws java.io.FileNotFoundException,
    java.io.IOException {

    Properties props= new Properties(System.getProperties());
    props.put("http.proxySet", "true");
    props.put("http.proxyHost", "192.168.0.1");
    props.put("http.proxyPort", "6588");
    System.setProperties(props);

    HTMLEditorKit.ParserCallback callback = new HTMLEditorKit.ParserCallback ()
    {

    public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) {

    if (t == HTML.Tag.META)
    {
    System.out.println("META inside");

    String attrib = (String) a.getAttribute (HTML.Attribute.NAME);
    if (attrib != null)
    {
    System.out.println("NAME : "+attrib);
    }
    String content =(String) a.getAttribute(HTML.Attribute.CONTENT);
    if (content != null)
    {
    System.out.println("content : "+content);
    }
    String httpequiv = (String)a.getAttribute(HTML.Attribute.HTTPEQUIV);
    if (httpequiv != null)
    {
    System.out.println("httpequiv : "+httpequiv);
    }
    }
    }
    };
    for (int counter = args.length - 1; counter >= 0; counter--)
    {
    Reader reader = getReader(args[counter]);
    try{
    new ParserDelegator().parse(reader, callback, false);
    }
    catch (Exception e) {
    System.out.println("ERROR IN SITE: "+args[counter]+"Exception"+e);
    }
    }
    }
    static Reader getReader(String uri)
    throws IOException {

    if (uri.startsWith("http:")) {

    URLConnection conn=
    new URL(uri).openConnection();
    return new
    InputStreamReader(conn.getInputStream());

    } else {

    return new FileReader(uri);
    }
    }
    }
     
    ABAKUS, Jun 13, 2004
    #1
    1. Advertising

Want to reply to this thread or ask your own question?

It takes just 2 minutes to sign up (and it's free!). Just click the sign up button to choose a username and then you can ask your own questions on the forum.
Similar Threads
  1. Eitan M
    Replies:
    1
    Views:
    333
    Eitan M
    Jun 10, 2005
  2. ad
    Replies:
    2
    Views:
    750
  3. ZOCOR

    XML Parser VS HTML Parser

    ZOCOR, Oct 3, 2004, in forum: Java
    Replies:
    11
    Views:
    848
    Paul King
    Oct 5, 2004
  4. asd
    Replies:
    1
    Views:
    355
    torakiki
    Dec 7, 2006
  5. Zach Dennis

    HTML-Parser / SGML-Parser

    Zach Dennis, Oct 1, 2003, in forum: Ruby
    Replies:
    5
    Views:
    455
    Bernard Delmée
    Oct 1, 2003
Loading...

Share This Page