I use TK to show some chinese web page, I get nothing,why?

Discussion in 'Perl Misc' started by quakewang@mail.whut.edu.cn, Nov 26, 2006.

  1. Guest

    In the following problem, I use TK to get a page and show it, but

    I get nothing when I use it on a chinese web page, and it can get some
    thing for a english based web page, why?

    I think it may the the "FONT" problem, but I do not not how to
    special a chinese font.

    I do not know how to spell a font name, xxxxx-bold-12, what is
    the
    xxxxx and I have a font file, how to know what is the font name shall I

    use to load it?


    thanks.
    ====================================================



    #!/usr/bin/perl

    use Tk;
    require LWP::UserAgent;
    use HTML::parse;

    %html_action =
    (
    "</TITLE>", \&end_title,
    "<H1>", \&start_heading,
    "</h2>", \&end_heading,
    "<H2>", \&start_heading,
    "</H2>", \&end_heading,
    "<H3>", \&start_heading,
    "</H3>", \&end_heading,
    "<H4>", \&start_heading,
    "</H4>", \&end_heading,
    "<H5>", \&start_heading,
    "</H5>", \&end_heading,
    "<H6>", \&start_heading,
    "</H6>", \&end_heading,
    "<P>", \&paragraph,
    "<BR>", \&line_break,
    "<HR>", \&draw_line,
    "<A>", \&flush_text,
    "</A>", \&end_link,
    "</BODY>", \&line_break,
    );

    $ua = new LWP::UserAgent;
    #$dictionary_url = "http://dict.cn/search/";
    #$dictionary_url = "http://www.perl.com/";
    $dictionary_url = "http://www.baidu.com/";
    #$dictionary_url =
    "http://www.oreilly.com/openbook/webclient/ch07.html";


    $mw = MainWindow->new;
    $mw->title("xword");
    $mw->CmdLine;

    $frame1 = $mw->Frame(-borderwidth => 2, -relief => 'ridge');
    $frame1->pack(-side => 'top', -expand => 'no', -fill => "x");
    $frame2 = $mw->Frame;
    $frame2->pack(-side => 'top', -expand => 'yes', -fill => 'both');
    $frame3 = $mw->Frame;
    $frame3->pack(-side => 'top', -expand => 'no', -fill => 'x');

    $frame1->Label(-text => "Enter Word: ")->pack(-side => "left", -anchor
    => "w");
    $entry = $frame1->Entry(-textvariable => \$word, -width => 40);
    $entry->pack(-side => "left", -anchor => "w", -fill => "x", -expand =>
    "y");

    $bttn = $frame1->Button(-text => "Lookup", -command => sub {
    &do_search(); });
    $bttn->pack(-side => "left", -anchor => "w");

    $entry->bind('<Return>', sub { &do_search(); } );

    $scroll = $frame2->Scrollbar;
    $text = $frame2->Text(-yscrollcommand => ['set', $scroll],
    -wrap => 'word',
    -font => 'ËÎÌå-bold-24',
    -state => 'disabled');
    $scroll->configure(-command => ['yview', $text]);
    $scroll->pack(-side => 'right', -expand => 'no', -fill => 'y');
    $text->pack(-side => 'left', -anchor => 'w',
    -expand => 'yes', -fill => 'both');

    $frame3->Label(-textvariable => \$INFORMATION,
    -justify => 'left')->pack(-side => 'left',
    -expand => 'no',
    -fill => 'x');
    $frame3->Button(-text => "Exit",
    -command => sub{exit} )->pack(-side => 'right',
    -anchor => 'e');


    $text->tag('configure', '</h2>', -font => 'ËÎÌå-bold-24');
    $text->tag('configure', '</H2>', -font => 'ËÎÌå-bold-18');
    $text->tag('configure', '</H3>', -font => 'ËÎÌå-bold-14');
    $text->tag('configure', '</H4>', -font => 'ËÎÌå-bold-12');
    $text->tag('configure', '</H5>', -font => 'ËÎÌå-bold-12');
    $text->tag('configure', '</H6>', -font => 'ËÎÌå-bold-12');


    $entry->focus;
    MainLoop;


    sub do_search {
    my ($url) = @_;

    return if ($word =~ /^\s*$/);

    $url = "$dictionary_url?q=$word" if (! defined $url);
    #$text->insert('end', "\n--------------------------------------\n");



    $INFORMATION = "Connect: $url";

    $text->configure(-cursor=> 'watch');
    $mw->idletasks;

    my $request = new HTTP::Request('GET', $url);

    my $response = $ua->request($request);
    if ($response->is_error) {
    $INFORMATION = "ERROR: Could not retrieve $url";
    } elsif ($response->is_success) {
    my $html = parse_html($response->decoded_content);

    ## Clear out text item
    $text->configure(-state => "normal");

    $text->delete('1.0', 'end');
    $html->traverse(\&display_html);
    $text->configure(-state => "disabled");
    $html_text = "";
    $INFORMATION = "Done";
    }

    $text->configure(-cursor => 'top_left_arrow');
    }


    sub display_html {
    my ($node, $startflag, $depth) = @_;
    my ($tag, $type, $coderef); ## This tag is the HTML tag...

    if (!ref $node) {
    $html_text .= $node;
    } else {
    if ($startflag) {
    $tag = $node->starttag;
    } else {
    $tag = $node->endtag;
    }

    ## Gets rid of any 'extra' stuff in the tag, and saves it
    if ($tag =~ /^(<\w+)\s(.*)>/) {
    $tag = "$1>";
    $extra = $2;
    }

    if (exists $html_action{$tag}) {
    $html_text =~ s/\s+/ /g;
    &{ $html_action{$tag} }($tag, $html_text);
    $html_text = "";
    }
    }
    1;
    }




    sub end_title {
    $mw->title("xword: ". $_[1]);
    }


    sub start_heading {
    &flush_text(@_);
    $text->insert('end', "\n\n");
    }

    sub end_heading {
    $text->insert('end', $_[1], $_[0]);
    $text->insert('end', "\n");
    }

    sub paragraph {
    &flush_text(@_);
    $text->insert('end', "\n\n");
    }

    sub line_break {
    &flush_text(@_);
    $text->insert('end', "\n");
    }

    sub draw_line {
    &flush_text(@_);
    $text->insert('end', "\n--------------------------------------\n");
    }

    sub flush_text {
    $text->insert('end', $_[1]);
    }

    sub end_link {
    ## Don't want to add links to mailto refs.
    if ($extra =~ /HREF\s*=\s*"(.+)"/ && $extra !~ /mailto/) {
    my $site = $1;

    ## The tags must have unique names to allow for a different
    ## binding to each one. (Otherwise we'd just be changing that same
    ## tag binding over and over again.)

    my $newtag = "LINK". $cnt++;

    $text->tag('configure', $newtag, -underline => 'true',
    -foreground => 'blue');
    $text->tag('bind', $newtag, '<Enter>',
    sub { $text->configure(-cursor => 'hand2');
    $INFORMATION = $site; });
    $text->tag('bind', $newtag, '<Leave>',
    sub { $text->configure(-cursor => 'top_left_arrow');
    $INFORMATION = "";});

    $text->tag('bind', $newtag, '<ButtonPress>',
    sub { &do_search($site); });

    $text->insert('end', $_[1], $newtag);
    } else {
    &flush_text(@_);
    }
    }
    , Nov 26, 2006
    #1
    1. Advertising

  2. zentara Guest

    On 25 Nov 2006 18:00:20 -0800, wrote:

    > In the following problem, I use TK to get a page and show it, but
    >
    >I get nothing when I use it on a chinese web page, and it can get some
    >thing for a english based web page, why?


    There is something funny in the way you decide there are no results,
    and when a error occurs. so it shows nothing.
    I tried chopping down your script, so that it just displayed the page
    without any word search or html parsing, so to work out 1 problem
    at a time.
    Your script did not display Chinese characters properly, on my
    machine.

    This is what I did.
    In order to avoid the complexity of your html parsing, (which I think is
    broken), I just display the url, so you can see it works.
    > I think it may the the "FONT" problem, but I do not not how to
    >special a chinese font.


    I commented out all font lines and let Tk do it.
    I hope it helps you out. I just used a simple parser
    to extract text.

    #!/usr/bin/perl
    use warnings;
    use strict;
    use Tk;
    require LWP::UserAgent;
    use utf8;
    use Encode;
    use HTML::Encoding 'encoding_from_http_message';
    use HTML::TokeParser;
    use HTML::TokeParser::Simple;

    my $url = "http://dict.cn/search/";

    my $mw = MainWindow->new;

    $mw->fontCreate('medium',
    -family=>'courier',
    -weight=>'bold',
    -size=>int(-14*14/10));

    my $scroll = $mw->Scrollbar;
    my $text = $mw->Text(-yscrollcommand => ['set', $scroll],
    -wrap => 'word',
    -font => 'medium',
    -bg => 'white',
    -state => 'disabled');

    $scroll->configure(-command => ['yview', $text]);
    $scroll->pack(-side => 'right', -expand => 'no', -fill => 'y');
    $text->pack(-side => 'left', -anchor => 'w',
    -expand => 'yes', -fill => 'both');

    &do_search;

    MainLoop;

    sub do_search {

    $text->configure(-cursor=> 'watch');
    $mw->idletasks;

    my $content = LWP::UserAgent->new->
    get($url, 'Accept-Charset'=>'UTF-8');
    my $enco = encoding_from_http_message($content);
    my $utf8 = decode($enco => $content->content());

    my $output;
    my $parser = HTML::TokeParser::Simple->new( \$utf8 );

    while ( my $token = $parser->get_token ) {
    # This prints all text in an HTML doc (i.e., it strips the HTML)
    next if ! $token->is_text;
    $output .= $token->as_is;
    }
    #remove obvious junk
    $output =~ s/\x{d}//g;

    ## Clear out text item
    $text->configure(-state => "normal");
    $text->delete('1.0', 'end');
    $text->insert('end', $output);
    # $text->insert('end', $utf8); #for the raw page
    $text->configure(-cursor => 'top_left_arrow');
    }
    __END__





    --
    I'm not really a human, but I play one on earth.
    http://zentara.net/japh.html
    zentara, Nov 26, 2006
    #2
    1. Advertising

Want to reply to this thread or ask your own question?

It takes just 2 minutes to sign up (and it's free!). Just click the sign up button to choose a username and then you can ask your own questions on the forum.
Similar Threads
  1. Mr. SweatyFinger

    why why why why why

    Mr. SweatyFinger, Nov 28, 2006, in forum: ASP .Net
    Replies:
    4
    Views:
    853
    Mark Rae
    Dec 21, 2006
  2. Mr. SweatyFinger
    Replies:
    2
    Views:
    1,739
    Smokey Grindel
    Dec 2, 2006
  3. Pete Mahoney
    Replies:
    1
    Views:
    214
    Ray at
    Dec 15, 2003
  4. Devin Jeanpierre
    Replies:
    2
    Views:
    427
    Devin Jeanpierre
    Feb 14, 2012
  5. Karen Wieprecht

    trying to "use Sys::Syslog" but I get nothing ...

    Karen Wieprecht, Jan 24, 2005, in forum: Perl Misc
    Replies:
    11
    Views:
    167
    Karen Wieprecht
    Jan 26, 2005
Loading...

Share This Page