html parser, some site work only

Discussion in 'Java' started by ABAKUS, Jun 13, 2004.

  1. ABAKUS

    ABAKUS Guest

    this code work only some site, why?
    Have you some ideas how it make?

    import java.io.*;
    import java.util.*;
    import java.net.*;
    import java.io.FileReader;
    import java.io.Reader;
    import java.io.FileNotFoundException;
    import javax.swing.text.html.HTML;
    import javax.swing.text.html.HTMLEditorKit;
    import javax.swing.text.html.HTMLEditorKit.ParserCallback;
    import javax.swing.text.html.parser.ParserDelegator;
    import javax.swing.text.MutableAttributeSet;




    class Meta {

    public static void main(String[] args) throws java.io.FileNotFoundException,
    java.io.IOException {

    Properties props= new Properties(System.getProperties());
    props.put("http.proxySet", "true");
    props.put("http.proxyHost", "192.168.0.1");
    props.put("http.proxyPort", "6588");
    System.setProperties(props);

    HTMLEditorKit.ParserCallback callback = new HTMLEditorKit.ParserCallback ()
    {

    public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) {

    if (t == HTML.Tag.META)
    {
    System.out.println("META inside");

    String attrib = (String) a.getAttribute (HTML.Attribute.NAME);
    if (attrib != null)
    {
    System.out.println("NAME : "+attrib);
    }
    String content =(String) a.getAttribute(HTML.Attribute.CONTENT);
    if (content != null)
    {
    System.out.println("content : "+content);
    }
    String httpequiv = (String)a.getAttribute(HTML.Attribute.HTTPEQUIV);
    if (httpequiv != null)
    {
    System.out.println("httpequiv : "+httpequiv);
    }
    }
    }
    };
    for (int counter = args.length - 1; counter >= 0; counter--)
    {
    Reader reader = getReader(args[counter]);
    try{
    new ParserDelegator().parse(reader, callback, false);
    }
    catch (Exception e) {
    System.out.println("ERROR IN SITE: "+args[counter]+"Exception"+e);
    }
    }
    }
    static Reader getReader(String uri)
    throws IOException {

    if (uri.startsWith("http:")) {

    URLConnection conn=
    new URL(uri).openConnection();
    return new
    InputStreamReader(conn.getInputStream());

    } else {

    return new FileReader(uri);
    }
    }
    }
     
    ABAKUS, Jun 13, 2004
    #1
    1. Advertisements

Want to reply to this thread or ask your own question?

It takes just 2 minutes to sign up (and it's free!). Just click the sign up button to choose a username and then you can ask your own questions on the forum.
Similar Threads
  1. Eitan M
    Replies:
    1
    Views:
    392
    Eitan M
    Jun 10, 2005
  2. ad
    Replies:
    2
    Views:
    848
  3. ZOCOR

    XML Parser VS HTML Parser

    ZOCOR, Oct 3, 2004, in forum: Java
    Replies:
    11
    Views:
    1,052
    Paul King
    Oct 5, 2004
  4. Nebojsa Topolscak
    Replies:
    3
    Views:
    700
    Jon Caldwell
    Jan 14, 2005
  5. Tiddley-Pom
    Replies:
    5
    Views:
    603
    Mark Parnell
    Oct 15, 2003
  6. asd
    Replies:
    1
    Views:
    418
    torakiki
    Dec 7, 2006
  7. raviraj joshi
    Replies:
    0
    Views:
    590
    raviraj joshi
    Jul 4, 2009
  8. Zach Dennis

    HTML-Parser / SGML-Parser

    Zach Dennis, Oct 1, 2003, in forum: Ruby
    Replies:
    5
    Views:
    722
    Bernard Delmée
    Oct 1, 2003
Loading...