|
Server : Apache/2.4.62 System : FreeBSD fbsdweb2.web.rcn.net 14.1-RELEASE FreeBSD 14.1-RELEASE releng/14.1-n267679-10e31f0946d8 GENERIC amd64 User : www ( 80) PHP Version : 8.3.8 Disable Function : NONE Directory : /domains/mandarintools/cgi-bin/ |
Upload File : |
# -*- coding: utf-8; -*-
package ChineseNumbers;
require Exporter;
use strict;
use Lingua::EN::Numbers qw(num2en num2en_ordinal);
use subs qw{EnglishToChineseNumber ChineseToEnglishNumber chinese_output english_output};
# Author: Erik Peterson
# E-mail: [email protected]
# Source: http://www.mandarintools.com/numbers.html
#
# Usage:
#
# use ChineseNumbers;
#
# ChineseNumbers->EnglishToChineseNumber(enumber, [output_type])
# enumber is an integer
# output_type (which is optional) can be
# trad : Output with traditional Chinese characters
# formaltrad : Output as formal numbers with traditional characters
# simp : Output using simplified Chinese characters
# formalsimp : Output as formal numbers in simplified characters
# unicodehex : Output as 4-digit Unicode hex blocks
# pinyin : Output as Hanyu Pinyin
# jyutpin : Output as Cantonese jyutpin romanization
# yalecant : Output as Cantonese Yale romanization
# The default is trad
#
# ChineseNumbers->ChineseToEnglishNumber(cnumber, [english_type])
# cnumber is a string in UTF-8
# english_type is
# arabic : plain Arabic numerals
# comma : plain Arabic numbers with commas
# words : written out using English words
#
# ChineseNumbers->chinese_output([option])
# Set the default output type used by EnglishToChineseNumber
# option can be any of the output options for EnglishToChineseNumber
# If no arguments, returns the current default
#
# ChineseNumbers->english_output([option])
# Set the default output type used by ChineseToEnglishNumber
# option can be any of the output options for ChineseToEnglishNumber
# If no arguments, returns the current default
#
BEGIN { }
my $default_outputtype = "trad";
my $default_englishtype = "arabic";
my $MINUS = "負";
my $DECIMAL = "點";
my @digits = ("零", "一", "二", "三", "四", "五", "六", "七", "八", "九");
my %digits = ("0", 0, "0", 0, "零", 0, "〇", 0,
"1", 1, "1", 1, "一", 1, "壹", 1,
"2", 2, "2", 2, "二", 2, "貳", 2, "贰", 2, "兩", 2, "两", 2,
"3", 3, "3", 3, "三", 3, "參", 3, "叄", 3, "叁", 3,
"4", 4, "4", 4, "四", 4, "肆", 4,
"5", 5, "5", 5, "五", 5, "伍", 5,
"6", 6, "6", 6, "六", 6, "陸", 6, "陆", 6,
"7", 7, "7", 7, "七", 7, "柒", 7,
"8", 8, "8", 8, "八", 8, "捌", 8,
"9", 9, "9", 9, "九", 9, "玖", 9);
my @beforeWan = ("十", "百", "千");
my %beforeWan = ("十", 10, "拾", 10,
"百", 100, "佰", 100,
"千", 1000, "仟", 1000);
my @afterWan = ("", "萬", "億", "兆", "京");
my %afterWan = ("萬", 10000, "万", 10000,
"億", 100000000, "亿", 100000000,
"兆", 1000000000000,
"京", 10000000000000000);
my $ALTTWO = "兩";
my $TEN = 10;
my %trad2simp = ("負" => "负",
"點" => "点",
"零" => "零",
"一" => "一",
"二" => "二",
"三" => "三",
"四" => "四",
"五" => "五",
"六" => "六",
"七" => "七",
"八" => "八",
"九" => "九",
"十" => "十",
"百" => "百",
"千" => "千",
"萬" => "万",
"億" => "亿",
"兆" => "兆",
"兩" => "两",
"點" => "点");
my %trad2formal = ("負" => "負",
"點" => "點",
"零" => "零",
"一" => "壹",
"二" => "貳",
"三" => "參",
"四" => "肆",
"五" => "伍",
"六" => "陸",
"七" => "柒",
"八" => "捌",
"九" => "玖",
"十" => "拾",
"百" => "佰",
"千" => "仟",
"萬" => "萬",
"億" => "億",
"兆" => "兆",
"兩" => "兩",
"點" => "點");
my %trad2formalsimp = ("負" => "负",
"點" => "点",
"零" => "零",
"一" => "壹",
"二" => "贰",
"三" => "叁",
"四" => "肆",
"五" => "伍",
"六" => "陆",
"七" => "柒",
"八" => "捌",
"九" => "玖",
"十" => "拾",
"百" => "佰",
"千" => "仟",
"萬" => "万",
"億" => "亿",
"兆" => "兆",
"兩" => "两");
my %trad2pinyin = ("負" => "fu4",
"點" => "dian3",
"零" => "ling2",
"一" => "yi1",
"二" => "er4",
"三" => "san1",
"四" => "si4",
"五" => "wu3",
"六" => "liu4",
"七" => "qi1",
"八" => "ba1",
"九" => "jiu3",
"十" => "shi2",
"百" => "bai3",
"千" => "qian1",
"萬" => "wan4",
"億" => "yi4",
"兆" => "zhao4",
"兩" => "liang3");
my %trad2yalecant = ("負" => "fu",
"點" => "dim2",
"零" => "ling2",
"一" => "yat",
"二" => "yih7",
"三" => "saam1",
"四" => "sei5",
"五" => "ng4",
"六" => "luhk",
"七" => "chat1",
"八" => "baat1",
"九" => "gao3",
"十" => "sap7",
"百" => "baak5",
"千" => "chin1",
"萬" => "maahn",
"億" => "yik1",
"兆" => "siu",
"兩" => "leung4");
my %trad2jyutpin = ("負" => "fu6",
"點" => "dim4",
"零" => "ling4",
"一" => "jat1",
"二" => "ji6",
"三" => "saam1",
"四" => "sei3",
"五" => "ng5",
"六" => "luk6",
"七" => "cat1",
"八" => "baat3",
"九" => "gau2",
"十" => "sap6",
"百" => "baak3",
"千" => "cin1",
"萬" => "maan6",
"億" => "jik1",
"兆" => "siu6",
"兩" => "loeng5");
sub new {
return bless {};
}
# The heart of the program. Does the actual conversion
sub EnglishToChineseNumber {
my($self) = shift;
my($enumber) = shift;
my($outputtype) = shift;
if ($outputtype eq "") {
$outputtype = $default_outputtype;
}
$outputtype = lc($outputtype);
# print "Output type : $outputtype\n";
my(@powers) = ();
my($power) = 0;
my($value) = 0;
my($negative) = 0; # is it a negative integer?
my($inzero) = 0; # are we in a stretch or 1 or more zeros (only add one zero for the stretch)
my($canaddzero) = 0; # only add a zero if there's something non-zero on both sides of it
my($cnumber) = ""; # the final result
my($remainder) = "";
# Remove all non-digits
$enumber =~ s/[^0-9\.-]//g;
# If zero, just return zero
if ($enumber == 0) {
return $digits[0];
}
# Check if it's negative, set the negative flag and make it positive
if ($enumber < 0) {
$negative = 1;
$enumber = -$enumber;
}
if ($enumber =~ m/([0-9]*)\.([0-9]+)/) {
$remainder = $2;
$enumber = $1;
}
# Get the value of the coefficient for each power of ten
while ($TEN ** $power <= $enumber) {
$value = ($enumber % ($TEN** ($power+1)))/($TEN**$power);
$powers[$power] = $value;
# Subtract out the current power's coefficient and increase the power
$enumber -= $enumber % ($TEN**($power+1));
$power++;
}
my($i);
# Take the decomposition of the number for above and generate the Chinese equivalent
for ($i = 0; $i < $power; $i++) {
#System.out.println("10^" + i + ":\t" + powers[i]);
if (($i % 4) == 0) { # Reached the next four powers up level
if ($powers[$i] != 0) {
$inzero = 0;
$canaddzero = 1;
$cnumber = $digits[$powers[$i]] . $afterWan[$i/4] . $cnumber;
} else {
# Check that something in the next three powers is non-zero before adding
if ((($i+3 < $power) && $powers[$i+3] != 0) ||
(($i+2 < $power) && $powers[$i+2] != 0) ||
(($i+1 < $power) && $powers[$i+1] != 0))
{
$cnumber = $afterWan[$i/4] . $cnumber;
$canaddzero = 0; # added
}
}
} else { # Add one, tens, hundreds, or thousands place for each level
if ($powers[$i] != 0) {
$inzero = 0;
$canaddzero = 1;
if ($power == 2 && $i == 1 && $powers[$i] == 1) { # No 一 with 10 through 19
$cnumber = $beforeWan[($i % 4)-1] . $cnumber;
#} else if ((i%4 = 3) && powers[i] == 2) { # when to use liang3 vs. er4
#cnumber.insert(0, ALTTWO + beforeWan[(i%4)-1]);
} else {
$cnumber = $digits[$powers[$i]] . $beforeWan[($i%4)-1] . $cnumber;
}
} else {
if ($canaddzero == 1 && $inzero == 0) { # Only insert one 零 for all consecutive zeroes
$inzero = 1;
$cnumber = $digits[$powers[$i]] . $cnumber;
}
}
}
}
if ($remainder ne "") {
$cnumber .= $DECIMAL;
for ($i = 0; $i < length($remainder); $i++) {
$cnumber .= $digits[substr($remainder, $i, 1)];
}
}
# Add the negative character
if ($negative == 1) {
$cnumber = $MINUS . $cnumber;
}
my($result, $j);
if ($outputtype eq "trad") {
$result = $cnumber;
} elsif ($outputtype eq "simp") {
for ($j = 0; $j < lengthu8($cnumber); $j++) {
$result .= $trad2simp{substru8($cnumber, $j, 1)};
}
} elsif ($outputtype eq "formaltrad") {
for ($j = 0; $j < lengthu8($cnumber); $j++) {
$result .= $trad2formal{substru8($cnumber, $j, 1)};
}
} elsif ($outputtype eq "formalsimp") {
for ($j = 0; $j < lengthu8($cnumber); $j++) {
$result .= $trad2formalsimp{substru8($cnumber, $j, 1)};
}
} elsif ($outputtype eq "pinyin") {
for ($j = 0; $j < lengthu8($cnumber); $j++) {
$result .= $trad2pinyin{substru8($cnumber, $j, 1)} . " ";
}
} elsif ($outputtype eq "jyutpin") {
for ($j = 0; $j < lengthu8($cnumber); $j++) {
$result .= $trad2jyutpin{substru8($cnumber, $j, 1)} . " ";
}
} elsif ($outputtype eq "yalecant") {
for ($j = 0; $j < lengthu8($cnumber); $j++) {
$result .= $trad2yalecant{substru8($cnumber, $j, 1)} . " ";
}
} else {
$result = $cnumber;
}
return $result;
}
sub ChineseToEnglishNumber {
my($self) = shift;
my($cnumber) = shift;
my($outputtype) = shift;
if ($outputtype eq "") {
$outputtype = $default_englishtype;
}
$outputtype = lc($outputtype);
my($i, $j, $result);
my($alldigits) = 1;
my($ordinal) = 0;
if ($cnumber =~ m/^第/) {
$ordinal = 1;
}
if ($cnumber =~ m/分之/) {
my($denom) = ($cnumber =~ m/^(.+?)分之/);
my($numer) = ($cnumber =~ m/分之(.+)$/);
$result = &ChineseToEnglishFull($numer)/&ChineseToEnglishFull($denom);
} elsif (lengthu8($cnumber) > 1) {
for ($i = 0; $i < lengthu8($cnumber); $i++) {
if (!defined($digits{substru8($cnumber, $i, 1)})) {
$alldigits = 0;
}
}
if ($alldigits == 1) {
$result = &ChineseToEnglishBrief($cnumber);
} else {
$result = &ChineseToEnglishFull($cnumber);
}
} else {
$result = &ChineseToEnglishFull($cnumber);
}
if ($outputtype eq "arabic") {
if ($ordinal) {
my($lastdigit) = substru8($result, lengthu8($result)-1, 1);
if ($lastdigit eq "1") {
$result .= "st";
} elsif ($lastdigit eq "2") {
$result .= "nd";
} elsif ($lastdigit eq "3") {
$result .= "rd";
} else {
$result .= "th";
}
}
return $result;
} elsif ($outputtype eq "comma") {
my $withcomma = "" . $result;
my $start;
if ($withcomma =~ m/\./) {
} else {
$start = (lengthu8($withcomma) % 3);
for ($i = $start; lengthu8($withcomma) > 3 and $i < lengthu8($withcomma); $i+=3) {
if ($i != 0) {
substr($withcomma, $i, 0, ",");
$i++;
}
}
}
if ($ordinal) {
my($lastdigit) = substru8($withcomma, lengthu8($withcomma)-1, 1);
if ($lastdigit eq "1") {
$withcomma .= "st";
} elsif ($lastdigit eq "2") {
$withcomma .= "nd";
} elsif ($lastdigit eq "3") {
$withcomma .= "rd";
} else {
$withcomma .= "th";
}
}
return $withcomma;
} elsif ($outputtype eq "words") {
if ($ordinal) {
return num2en_ordinal($result);
} else {
return num2en($result);
}
}
}
sub ChineseToEnglishBrief {
my($cnumber) = shift;
my($nextcchar);
my($place, $digitval, $total) = (0,0,0);
for ($place = 0; $place < lengthu8($cnumber); $place++) {
$total *= 10;
$digitval = $digits{substru8($cnumber, $place, 1)};
$total += $digitval;
}
return $total;
}
sub ChineseToEnglishFull {
my($cnumber) = shift;
my($negative) = 0;
my($cnumlength);
my($i);
my($j, $digitval, $cchar, $afterdecimal);
my($power) = 0;
my($leveltotal) = 0;
my($total) = 0;
my($nextcchar);
$afterdecimal = 0;
$cnumber =~ s/万亿/兆/;
$cnumber =~ s/萬億/兆/;
$cnumber =~ s/亿万/兆/;
$cnumber =~ s/億萬/兆/;
$cnumber =~ s/個//;
$cnumber =~ s/个//;
$cnumber =~ s/廿/二十/;
$cnumber =~ s/卄/二十/;
$cnumber =~ s/卅/三十/;
$cnumber =~ s/卌/四十/;
$cnumlength = lengthu8($cnumber);
#print "In Chinese to English Full<BR>";
for ($i = 0; $i < $cnumlength; $i++) {
#print "i $i ";
$cchar = substru8($cnumber, $i, 1);
#print "$cchar $leveltotal $power";
if ($i == 0 && ($cchar eq "负" or $cchar eq '負' or $cchar eq '-')) {
$negative = 1;
} elsif ($i == 0 && $cchar eq '第') { # ordinal
# Do nothing, handled elsewhere
} elsif ($cchar eq '點' or $cchar eq '点' or $cchar eq '.' or
$cchar eq '.') {
$afterdecimal = 1;
$power = -1;
} elsif ($cchar eq '兆') {
$power = 12;
$leveltotal = 1 if $leveltotal == 0;
$total += $leveltotal * (10 ** $power);
$leveltotal = 0;
$power -= 4;
} elsif ($cchar eq '億' or $cchar eq '亿') {
$power = 8;
$leveltotal = 1 if $leveltotal == 0;
$total += $leveltotal * (10** $power);
$leveltotal = 0;
$power -= 4;
} elsif ($cchar eq '萬' or $cchar eq '万') {
$power = 4;
$leveltotal = 1 if $leveltotal == 0;
$total += $leveltotal * (10**$power);
$leveltotal = 0;
$power -= 4;
} elsif ($cchar eq '千' or $cchar eq '仟') {
$leveltotal += 1000;
} elsif ($cchar eq "百" or $cchar eq '佰') {
$leveltotal += 100;
} elsif ($cchar eq "十" or $cchar eq '拾') {
$leveltotal += 10;
} elsif ($cchar eq "零" or $cchar eq "〇" or
$cchar eq "0" or $cchar eq "0") {
$power = 0;
} elsif (defined($digits{$cchar})) {
$digitval = $digits{$cchar};
#print "Digit val is $digitval, $i, $cnumlength\n";
if ($afterdecimal) {
$leveltotal += $digitval * (10**$power);
$power--;
while ($i+1 < $cnumlength and defined($digits{substru8($cnumber, $i+1, 1)})) {
$leveltotal += $digits{substru8($cnumber, $i+1, 1)} * (10**$power);
$power--;
$i++;
}
} elsif ($i+1 < $cnumlength) {
$nextcchar = substru8($cnumber, $i+1, 1);
if ($nextcchar eq "十" or $nextcchar eq "拾") {
$leveltotal += $digitval * 10;
$i++;
} elsif ($nextcchar eq "百" or $nextcchar eq "佰") {
$leveltotal += $digitval * 100;
$i++;
} elsif ($nextcchar eq "千" or $nextcchar eq "仟") {
$leveltotal += $digitval * 1000;
$i++;
} elsif (defined($digits{$nextcchar})) {
$leveltotal *= 10;
$leveltotal += $digitval;
while ($i+1 < $cnumlength and defined($digits{substru8($cnumber, $i+1, 1)})) {
$leveltotal *= 10;
$leveltotal += $digits{substru8($cnumber, $i+1, 1)};
$i++;
}
} else {
$leveltotal += $digitval;
}
} else {
if ($i+1 == $cnumlength and $i > 0) {
my $prevchar = substru8($cnumber, $i-1, 1);
if ($prevchar eq '兆') {
$leveltotal += $digitval * (10**11);
} elsif ($prevchar eq '億' or $prevchar eq '亿') {
$leveltotal += $digitval * (10**7);
} elsif ($prevchar eq '萬' or $prevchar eq '万') {
$leveltotal += $digitval * 1000;
} elsif ($prevchar eq '千' or $prevchar eq '仟') {
$leveltotal += $digitval * 100;
} elsif ($prevchar eq "百" or $prevchar eq '佰') {
$leveltotal += $digitval * 10;
} else {
$leveltotal += $digitval;
}
} else {
$leveltotal += $digitval;
}
#print "digit $digitval\n";
}
} else {
print STDERR "Seems to be an error in the number. $cnumber\n";
return "";
# return negative infinity;
}
}
# Catch remaining leveltotal
#print("Level total " + $leveltotal + " power " + $power + " ten to power " + (10**$power)/10);
$total += $leveltotal; # * 10** $power;
#if ($cchar eq '點' or $cchar eq '点' or $cchar eq '.') {
#$power = -1;
#for ($j = $i+1; $j < $cnumlength; $j++, $power--) {
#$digitval = $digits{substru8($cnumber, $j, 1)};
#$total += $digitval * (10 ** $power);
# }
#}
if ($negative == 1) { $total = -$total; }
return $total;
}
sub chinese_output {
my($self) = shift;
if (@_) { $default_outputtype = shift }
return $default_outputtype;
}
sub english_output {
my($self) = shift;
if (@_) { $default_englishtype = shift }
return $default_englishtype;
}
sub lengthu8 {
my($utfstring) = shift;
my($i, $charcount, $byte1);
$i = 0; $charcount = 0;
while ($i < length($utfstring)) {
#print "i $i $utfstring\n";
$byte1 = substr($utfstring, $i, 1);
if (unpack("C", $byte1) <= 0x7F) { # 1 byte long (ASCII)
$i++;
$charcount++;
} elsif ((unpack("C", $byte1) & 0xE0) == 0xC0) { # 2 bytes long
$i += 2;
$charcount++;
} else { # 3 bytes long
$i += 3;
$charcount++;
}
}
return $charcount;
}
sub substru8 {
my($utfstring, $start, $span) = @_;
my($i, $charcount, $bytestart, $bytespan, $byte1);
#print "$utfstring START $start SPAN $span\n";
$i = 0; $charcount = 0;
while ($i < length($utfstring)) {
if ($charcount == $start) { $bytestart = $i; }
if ($charcount == ($start+$span)) { $bytespan = $i - $bytestart; }
$byte1 = substr($utfstring, $i, 1);
if (unpack("C", $byte1) <= 0x7F) { # 1 byte long (ASCII)
$i++;
$charcount++;
} elsif ((unpack("C", $byte1) & 0xE0) == 0xC0) { # 2 bytes long
$i += 2;
$charcount++;
} else { # 3 bytes long
$i += 3;
$charcount++;
}
}
if ($charcount == ($start+$span)) { $bytespan = $i - $bytestart; }
#print "bytestart $bytestart bytespan $bytespan\n";
return substr($utfstring, $bytestart, $bytespan);
}
END { }
1;