|
Server : Apache/2.4.62 System : FreeBSD fbsdweb2.web.rcn.net 14.1-RELEASE FreeBSD 14.1-RELEASE releng/14.1-n267679-10e31f0946d8 GENERIC amd64 User : www ( 80) PHP Version : 8.3.8 Disable Function : NONE Directory : /domains/mandarintools/cgi-bin/Lingua/EN/ |
Upload File : |
require 5.004; # Time-stamp: "2005-01-05 17:12:51 AST"
package Lingua::EN::Numbers;
require Exporter;
@ISA = qw(Exporter);
use strict;
BEGIN { *DEBUG = sub () {0} unless defined &DEBUG } # setup a DEBUG constant
use vars qw(
@EXPORT @EXPORT_OK $VERSION
$MODE $TRUE $FALSE
%D %Card2ord %Mult
);
$VERSION = '1.01';
@EXPORT = ();
@EXPORT_OK = qw( num2en num2en_ordinal );
#--------------------------------------------------------------------------
# Here begins the old junk, including mock-object stuff:
$TRUE = 1; $FALSE = 0; $MODE = "American";
sub import {
@_ = grep { $_ ne 'American' and $_ ne 'British' } @_;
goto &Exporter::import;
}
sub new {
my $class = shift;
my $obj = bless [undef], ref($class) || $class;
$obj->parse($_[0]);
return $obj;
}
sub parse { $_[0][0] = $_[1]; return 1; }
sub get_string { num2en( $_[0][0] ); }
# End of legacy stuff.
#--------------------------------------------------------------------------
@D{0 .. 20, 30,40,50,60,70,80,90} = qw|
zero
one two three four five six seven eight nine ten
eleven twelve thirteen fourteen fifteen
sixteen seventeen eighteen nineteen
twenty thirty forty fifty sixty seventy eighty ninety
|;
@Card2ord{ qw| one two three five eight nine twelve |}
= qw| first second third fifth eighth ninth twelfth |;
{
my $c = 0;
for ( '', qw<
thousand million billion trillion quadrillion quintillion sextillion
septillion octillion nonillion
> ) {
$Mult{$c} = $_;
$c++;
}
}
#==========================================================================
sub num2en_ordinal {
# Cardinals are [one two three...]
# Ordinals are [first second third...]
return undef unless defined $_[0] and length $_[0];
my($x) = $_[0];
$x = num2en($x);
return $x unless $x;
$x =~ s/(\w+)$//s or return $x . "th";
my $last = $1;
$last =
$Card2ord{$last} || ( $last =~ s/y$/ieth/ && $last ) || ( $last . "th" );
return "$x$last";
}
#==========================================================================
sub num2en {
my $x = $_[0];
return undef unless defined $x and length $x;
return 'not-a-number' if $x eq 'NaN';
return 'positive infinity' if $x =~ m/^\+inf(?:inity)?$/si;
return 'negative infinity' if $x =~ m/^\-inf(?:inity)?$/si;
return 'infinity' if $x =~ m/^inf(?:inity)?$/si;
return $D{$x} if exists $D{$x}; # the most common cases
# Make sure it's not in scientific notation:
{ my $e = _e2en($x); return $e if defined $e; }
my $orig = $x;
$x =~ s/,//g; # nix any commas
my $sign;
$sign = $1 if $x =~ s/^([-+])//s;
my($int, $fract);
if( $x =~ m<^\d+$> ) { $int = $x }
elsif( $x =~ m<^(\d+)\.(\d+)$> ) { $int = $1; $fract = $2 }
elsif( $x =~ m<^\.(\d+)$> ) { $fract = $1 }
else {
DEBUG and print "Not a number: \"orig\"\n";
return undef;
}
DEBUG and printf " Working on Sign[%s] Int2en[%s] Fract[%s] < \"%s\"\n",
map defined($_) ? $_ : "nil", $sign, $int, $fract, $orig;
return join ' ', grep defined($_) && length($_),
_sign2en($sign),
_int2en($int),
_fract2en($fract),
;
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sub _sign2en {
return undef unless defined $_[0] and length $_[0];
return 'negative' if $_[0] eq '-';
return 'positive' if $_[0] eq '+';
return "WHAT_IS_$_[0]";
}
sub _fract2en { # "1234" => "point one two three four"
return undef unless defined $_[0] and length $_[0];
my $x = $_[0];
return join ' ', 'point', map $D{$_}, split '', $x;
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# The real work:
sub _int2en {
return undef unless defined $_[0] and length $_[0]
and $_[0] =~ m/^\d+$/s;
my($x) = $_[0];
return $D{$x} if defined $D{$x}; # most common/irreg cases
if( $x =~ m/^(.)(.)$/ ) {
return $D{$1 . '0'} . '-' . $D{$2};
# like forty - two
# note that neither bit can be zero at this point
} elsif( $x =~ m/^(.)(..)$/ ) {
my($h, $rest) = ("$D{$1} hundred", $2);
return $h if $rest eq '00';
return "$h and " . _int2en(0 + $rest);
} else {
return _bigint2en($x);
}
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sub _bigint2en {
return undef unless defined $_[0] and length $_[0]
and $_[0] =~ m/^\d+$/s;
my($x) = $_[0];
my @chunks; # each: [ string, exponent ]
{
my $groupnum = 0;
my $num;
while( $x =~ s<(\d{1,3})$><>s ) { # pull at most three digits from the end
$num = $1 + 0;
unshift @chunks, [ $num, $groupnum ] if $num;
++$groupnum;
}
return $D{'0'} unless @chunks; # rare but possible
}
my $and;
$and = 'and' if $chunks[-1][1] == 0 and $chunks[-1][0] < 100;
# The special 'and' that shows up in like "one thousand and eight"
# and "two billion and fifteen", but not "one thousand [*and] five hundred"
# or "one million, [*and] nine"
_chunks2en( \@chunks );
$chunks[-2] .= " and" if $and and @chunks > 1;
return "$chunks[0] $chunks[1]" if @chunks == 2;
# Avoid having a comma if just two units
return join ", ", @chunks;
}
sub _chunks2en {
my $chunks = $_[0];
return unless @$chunks;
my @out;
foreach my $c (@$chunks) {
push @out, $c = _groupify( _int2en( $c->[0] ), $c->[1] ) if $c->[0];
}
@$chunks = @out;
return;
}
sub _groupify {
# turn ("seventeen", 3) => "seventeen billion"
my($basic, $multnum) = @_;
return $basic unless $multnum; # the first group is unitless
DEBUG > 2 and print " Groupifying $basic x $multnum mults\n";
return "$basic $Mult{$multnum}" if $Mult{$multnum};
# Otherwise it must be huuuuuge, so fake it with scientific notation
return "$basic " . "times ten to the " . num2en_ordinal($basic * 3);
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#
# Because I can never remember this:
#
# 3.1E8
# ^^^ is called the "mantissa"
# ^ is called the "exponent"
# (the implicit "10" is the "base" a/k/a "radix")
sub _e2en {
my $x = $_[0];
my($m, $e);
if( $x =~
m<
^(
[-+]? # leading sign
(?:
[\d,]+ | [\d,]*\.\d+ # number
)
)
[eE]
(-?\d+) # mantissa, has to be an integer
$
>x
) {
($m, $e) = ($1, $2);
DEBUG and print " Scientific notation: [$x] => $m E $e\n";
$e += 0;
return num2en($m) . ' times ten to the ' . num2en_ordinal($e);
} else {
DEBUG and print " Okay, $x isn't in exponential notation\n";
return undef;
}
}
#==========================================================================
1;
__END__
=head1 NAME
Lingua::EN::Numbers - turn "407" into "four hundred and seven", etc.
=head1 SYNOPSIS
use Lingua::EN::Numbers qw(num2en num2en_ordinal);
my $x = 234;
my $y = 54;
print "You have ", num2en($x), " things to do today!\n";
print "You will stop caring after the ", num2en_ordinal($y), ".\n";
prints:
You have two hundred and thirty-four things to do today!
You will stop caring after the fifty-fourth.
=head1 DESCRIPTION
Lingua::EN::Numbers turns numbers into English text. It exports
(upon request) two functions,
C<num2en> and C<num2en_ordinal>. Each takes a scalar value and returns
a scalar value. The return value is the English text expressing that
number; or if what you provided wasn't a number, then they return undef.
This module can handle integers like "12" or "-3" and real numbers like "53.19".
This module also understands exponential notation -- it turns "4E9" into
"four times ten to the ninth"). And it even turns "INF", "-INF", "NaN"
into "infinity", "negative infinity", and "not a number", respectively.
Any commas in the input numbers are ignored.
=head1 LEGACY INTERFACE
For some amount of backward compatability with the old (before 1.01)
version of this module, the old OO interface is supported, where you can
construct a number object with C<new(I<[optionalvalue]>)>, change its
value with C<parse(I<value>)>, and get its Engish expression with
C<get_string()>.
The rest of the old version's interface (like
$Lingua::EN::Numbers::MODE and the other $Lingua::EN::Numbers::I<ALLCAPS>
variables) are no longer supported. This is not a great loss, as much
of that stuff never worked right in the first place.
=head1 SEE ALSO
L<Lingua::EN::WordsToNumbers>
L<Lingua::EN::Numbers::Ordinate>
L<Lingua::EN::Numbers::Years>
=head1 COPYRIGHT
Copyright (c) 2005, Sean M. Burke, author of the later versions.
This library is free software; you can redistribute it and/or modify
it only under the terms of version 2 of the GNU General Public License
(L<perlgpl>).
This program is distributed in the hope that it will be useful, but
without any warranty; without even the implied warranty of
merchantability or fitness for a particular purpose.
(But if you have any problems with this library, I ask that you let
me know.)
=head1 AUTHOR
Sean M. Burke, [email protected]
(The author of the 0.01 version was Stephen Pandich.)
=cut