Unicode statistics (uses Data::Alias)

Discussion in 'Perl Misc' started by Dr.Ruud, Jun 7, 2006.

  1. Dr.Ruud

    Dr.Ruud Guest

    #!/usr/bin/perl
    # Script-Id: unicount.pl.0990@ruud
    # Subject: show Unicode statistics

    use strict ;
    use warnings ;

    use Data::Alias ;


    binmode STDOUT, ':utf8' ;


    my @table =
    # +--Name------+---qRegexp--------+-C-+-L-+-U-+
    (
    [ 'xdigit' , qr/[[:xdigit:]]/ , 0 , 0 , 0 ] ,
    [ 'ascii' , qr/[[:ascii:]]/ , 0 , 0 , 0 ] ,
    [ '\\d' , qr/\d/ , 0 , 0 , 0 ] ,
    [ 'digit' , qr/[[:digit:]]/ , 0 , 0 , 0 ] ,
    [ 'IsNumber' , qr/\p{IsNumber}/ , 0 , 0 , 0 ] ,
    [ 'alpha' , qr/[[:alpha:]]/ , 0 , 0 , 0 ] ,
    [ 'alnum' , qr/[[:alnum:]]/ , 0 , 0 , 0 ] ,
    [ 'word' , qr/[[:word:]]/ , 0 , 0 , 0 ] ,
    [ 'graph' , qr/[[:graph:]]/ , 0 , 0 , 0 ] ,
    [ 'print' , qr/[[:print:]]/ , 0 , 0 , 0 ] ,
    [ 'blank' , qr/[[:blank:]]/ , 0 , 0 , 0 ] ,
    [ 'space' , qr/[[:space:]]/ , 0 , 0 , 0 ] ,
    [ 'punct' , qr/[[:punct:]]/ , 0 , 0 , 0 ] ,
    [ 'cntrl' , qr/[[:cntrl:]]/ , 0 , 0 , 0 ] ,
    ) ;


    my @codepoints =
    (
    0x0000 .. 0xD7FF,
    0xE000 .. 0xFDCF,
    0xFDF0 .. 0xFFFD,
    0x10000 .. 0x1FFFD,
    0x20000 .. 0x2FFFD, # etc.
    ) ;


    for my $row ( @table )
    {
    alias my ($name, $qrx, $count, $lower, $upper) = @$row ;

    printf "\n%s\n", $name ;

    my $n = 0 ;

    for ( @codepoints )
    {
    local $_ = chr ; # int-2-char conversion
    $n++ ;

    if ( /$qrx/ )
    {
    $count++ ;
    $lower++ if /[[:lower:]]/ ;
    $upper++ if /[[:upper:]]/ ;
    }
    }

    my $show_lower_upper =
    ($lower || $upper)
    ? sprintf( " (lower:%6d, upper:%6d)"
    , $lower
    , $upper
    )
    : '' ;

    printf "%6d /%6d =%7.3f%%%s\n"
    , $count
    , $n
    , 100 * $count / $n
    , $show_lower_upper
    }

    print "\n" ;

    __END__

    --
    Affijn, Ruud

    "Gewoon is een tijger."
     
    Dr.Ruud, Jun 7, 2006
    #1
    1. Advertising

Want to reply to this thread or ask your own question?

It takes just 2 minutes to sign up (and it's free!). Just click the sign up button to choose a username and then you can ask your own questions on the forum.
Similar Threads
  1. Sean McIlroy

    alias for data member of class instance?

    Sean McIlroy, Feb 5, 2007, in forum: Python
    Replies:
    4
    Views:
    464
    Bruno Desthuilliers
    Feb 5, 2007
  2. Aahz
    Replies:
    1
    Views:
    309
    Jenx880
    Aug 24, 2007
  3. Replies:
    0
    Views:
    400
  4. Hendrik van Rooyen
    Replies:
    3
    Views:
    310
    Bjoern Schliessmann
    Mar 30, 2008
  5. grocery_stocker
    Replies:
    9
    Views:
    804
    grocery_stocker
    May 24, 2008
Loading...

Share This Page