D
Dr.Ruud
#!/usr/bin/perl
# Script-Id: unicount.pl.0990@ruud
# Subject: show Unicode statistics
use strict ;
use warnings ;
use Data::Alias ;
binmode STDOUT, ':utf8' ;
my @table =
# +--Name------+---qRegexp--------+-C-+-L-+-U-+
(
[ 'xdigit' , qr/[[:xdigit:]]/ , 0 , 0 , 0 ] ,
[ 'ascii' , qr/[[:ascii:]]/ , 0 , 0 , 0 ] ,
[ '\\d' , qr/\d/ , 0 , 0 , 0 ] ,
[ 'digit' , qr/[[:digit:]]/ , 0 , 0 , 0 ] ,
[ 'IsNumber' , qr/\p{IsNumber}/ , 0 , 0 , 0 ] ,
[ 'alpha' , qr/[[:alpha:]]/ , 0 , 0 , 0 ] ,
[ 'alnum' , qr/[[:alnum:]]/ , 0 , 0 , 0 ] ,
[ 'word' , qr/[[:word:]]/ , 0 , 0 , 0 ] ,
[ 'graph' , qr/[[:graph:]]/ , 0 , 0 , 0 ] ,
[ 'print' , qr/[[
rint:]]/ , 0 , 0 , 0 ] ,
[ 'blank' , qr/[[:blank:]]/ , 0 , 0 , 0 ] ,
[ 'space' , qr/[[:space:]]/ , 0 , 0 , 0 ] ,
[ 'punct' , qr/[[
unct:]]/ , 0 , 0 , 0 ] ,
[ 'cntrl' , qr/[[:cntrl:]]/ , 0 , 0 , 0 ] ,
) ;
my @codepoints =
(
0x0000 .. 0xD7FF,
0xE000 .. 0xFDCF,
0xFDF0 .. 0xFFFD,
0x10000 .. 0x1FFFD,
0x20000 .. 0x2FFFD, # etc.
) ;
for my $row ( @table )
{
alias my ($name, $qrx, $count, $lower, $upper) = @$row ;
printf "\n%s\n", $name ;
my $n = 0 ;
for ( @codepoints )
{
local $_ = chr ; # int-2-char conversion
$n++ ;
if ( /$qrx/ )
{
$count++ ;
$lower++ if /[[:lower:]]/ ;
$upper++ if /[[:upper:]]/ ;
}
}
my $show_lower_upper =
($lower || $upper)
? sprintf( " (lower:%6d, upper:%6d)"
, $lower
, $upper
)
: '' ;
printf "%6d /%6d =%7.3f%%%s\n"
, $count
, $n
, 100 * $count / $n
, $show_lower_upper
}
print "\n" ;
__END__
# Script-Id: unicount.pl.0990@ruud
# Subject: show Unicode statistics
use strict ;
use warnings ;
use Data::Alias ;
binmode STDOUT, ':utf8' ;
my @table =
# +--Name------+---qRegexp--------+-C-+-L-+-U-+
(
[ 'xdigit' , qr/[[:xdigit:]]/ , 0 , 0 , 0 ] ,
[ 'ascii' , qr/[[:ascii:]]/ , 0 , 0 , 0 ] ,
[ '\\d' , qr/\d/ , 0 , 0 , 0 ] ,
[ 'digit' , qr/[[:digit:]]/ , 0 , 0 , 0 ] ,
[ 'IsNumber' , qr/\p{IsNumber}/ , 0 , 0 , 0 ] ,
[ 'alpha' , qr/[[:alpha:]]/ , 0 , 0 , 0 ] ,
[ 'alnum' , qr/[[:alnum:]]/ , 0 , 0 , 0 ] ,
[ 'word' , qr/[[:word:]]/ , 0 , 0 , 0 ] ,
[ 'graph' , qr/[[:graph:]]/ , 0 , 0 , 0 ] ,
[ 'print' , qr/[[
[ 'blank' , qr/[[:blank:]]/ , 0 , 0 , 0 ] ,
[ 'space' , qr/[[:space:]]/ , 0 , 0 , 0 ] ,
[ 'punct' , qr/[[
[ 'cntrl' , qr/[[:cntrl:]]/ , 0 , 0 , 0 ] ,
) ;
my @codepoints =
(
0x0000 .. 0xD7FF,
0xE000 .. 0xFDCF,
0xFDF0 .. 0xFFFD,
0x10000 .. 0x1FFFD,
0x20000 .. 0x2FFFD, # etc.
) ;
for my $row ( @table )
{
alias my ($name, $qrx, $count, $lower, $upper) = @$row ;
printf "\n%s\n", $name ;
my $n = 0 ;
for ( @codepoints )
{
local $_ = chr ; # int-2-char conversion
$n++ ;
if ( /$qrx/ )
{
$count++ ;
$lower++ if /[[:lower:]]/ ;
$upper++ if /[[:upper:]]/ ;
}
}
my $show_lower_upper =
($lower || $upper)
? sprintf( " (lower:%6d, upper:%6d)"
, $lower
, $upper
)
: '' ;
printf "%6d /%6d =%7.3f%%%s\n"
, $count
, $n
, 100 * $count / $n
, $show_lower_upper
}
print "\n" ;
__END__