Unicode statistics (uses Data::Alias)

D

Dr.Ruud

#!/usr/bin/perl
# Script-Id: unicount.pl.0990@ruud
# Subject: show Unicode statistics

use strict ;
use warnings ;

use Data::Alias ;


binmode STDOUT, ':utf8' ;


my @table =
# +--Name------+---qRegexp--------+-C-+-L-+-U-+
(
[ 'xdigit' , qr/[[:xdigit:]]/ , 0 , 0 , 0 ] ,
[ 'ascii' , qr/[[:ascii:]]/ , 0 , 0 , 0 ] ,
[ '\\d' , qr/\d/ , 0 , 0 , 0 ] ,
[ 'digit' , qr/[[:digit:]]/ , 0 , 0 , 0 ] ,
[ 'IsNumber' , qr/\p{IsNumber}/ , 0 , 0 , 0 ] ,
[ 'alpha' , qr/[[:alpha:]]/ , 0 , 0 , 0 ] ,
[ 'alnum' , qr/[[:alnum:]]/ , 0 , 0 , 0 ] ,
[ 'word' , qr/[[:word:]]/ , 0 , 0 , 0 ] ,
[ 'graph' , qr/[[:graph:]]/ , 0 , 0 , 0 ] ,
[ 'print' , qr/[[:print:]]/ , 0 , 0 , 0 ] ,
[ 'blank' , qr/[[:blank:]]/ , 0 , 0 , 0 ] ,
[ 'space' , qr/[[:space:]]/ , 0 , 0 , 0 ] ,
[ 'punct' , qr/[[:punct:]]/ , 0 , 0 , 0 ] ,
[ 'cntrl' , qr/[[:cntrl:]]/ , 0 , 0 , 0 ] ,
) ;


my @codepoints =
(
0x0000 .. 0xD7FF,
0xE000 .. 0xFDCF,
0xFDF0 .. 0xFFFD,
0x10000 .. 0x1FFFD,
0x20000 .. 0x2FFFD, # etc.
) ;


for my $row ( @table )
{
alias my ($name, $qrx, $count, $lower, $upper) = @$row ;

printf "\n%s\n", $name ;

my $n = 0 ;

for ( @codepoints )
{
local $_ = chr ; # int-2-char conversion
$n++ ;

if ( /$qrx/ )
{
$count++ ;
$lower++ if /[[:lower:]]/ ;
$upper++ if /[[:upper:]]/ ;
}
}

my $show_lower_upper =
($lower || $upper)
? sprintf( " (lower:%6d, upper:%6d)"
, $lower
, $upper
)
: '' ;

printf "%6d /%6d =%7.3f%%%s\n"
, $count
, $n
, 100 * $count / $n
, $show_lower_upper
}

print "\n" ;

__END__
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

No members online now.

Forum statistics

Threads
473,744
Messages
2,569,482
Members
44,901
Latest member
Noble71S45

Latest Threads

Top