KGRKJGETMRETU895U-589TY5MIGM5JGB5SDFESFREWTGR54TY
Server : Apache/2.4.62
System : FreeBSD fbsdweb2.web.rcn.net 14.1-RELEASE FreeBSD 14.1-RELEASE releng/14.1-n267679-10e31f0946d8 GENERIC amd64
User : www ( 80)
PHP Version : 8.3.8
Disable Function : NONE
Directory :  /domains/mandarintools/download/

Upload File :
current_dir [ Writeable ] document_root [ Writeable ]

 

Current File : /domains/mandarintools/download/ChineseNumbers.pm
# -*- coding: cn-big5; -*-
package ChineseNumbers;
require Exporter;
use strict;

use subs qw{EnglishToChineseNumber ChineseToEnglishNumber};

# Author: Erik Peterson
# E-mail: [email protected]
# Source: http://www.mandarintools.com/numbers.html
#
# Usage:
#
# use ChineseNumbers;
#
# ChineseNumbers->EnglishToChineseNumber(enumber, [output_type])
#   enumber is an integer
#   output_type (which is optional) can be
#     big5      : Output Chinese using Big5
#     formalb5  : Output as formal numbers in Big5
#     gb        : Output using GB
#     formalgb  : Output as formal numbers in GB (not working yet)
#     utf8      : Output as (traditional character) UTF-8
#     unicodehex: Output as 4-digit Unicode hex blocks
#     pinyin    : Output as Hanyu Pinyin
#     jyutpin   : Output as Cantonese jyutpin romanization
#     yalecant  : Output as Cantonese Yale romanization
#    The default is big5
#
# ChineseNumbers->ChineseToEnglishNumber(cnumber, [input_type])
#   cnumber is a string in GB, Big5, UTF-8
#   input_type is "big5", "gb", or "utf8", depending on cnumber
#    default is "big5"
#
# ChineseNumbers->chinese_output([option])
#   Set the default output type used by EnglishToChineseNumber
#    option can be any of the output options for EnglishToChineseNumber
#    If no arguments, returns the current default
#
# ChineseNumbers->chinese_input([option])
#   Set the default input type used by ChineseToEnglishNumber
#    option can be "big5", "gb", or "utf8"
#    If no arguments, returns the current default


BEGIN { }

my $minus = "�t";

my @digits = ("�s", "�@", "�G", "�T", "�|", "��", "��", "�C", "�K", "�E"); 

my %digits = ("�s", 0,
	      "�@", 1,
	      "�G", 2, 
	      "��", 2, 
	      "�T", 3,
	      "�|", 4,
	      "��", 5,
	      "��", 6,
	      "�C", 7,
	      "�K", 8,
	      "�E", 9); 

my @beforeWan = ("�Q", "��", "�d"); 
my @afterWan = ("", "�U", "��", "��", "��"); 

my %beforeWan = ("�Q", 10,
		 "��", 100,
		 "�d", 1000); 

my %afterWan = ("�U", 10000,
		"��", 100000000,
		"��", 1000000000000,
		"��", 10000000000000000); 

my $ALTTWO = "��";
my $TEN = 10;

my $default_outputtype = "big5";
my $default_inputtype = "big5";

my %trad2simp = ("�t" => "��", 
		 "�s" => "��", 
		 "�@" => "һ",
		 "�G" => "��",
		 "�T" => "�", 
		 "�|" => "��",
		 "��" => "��",
		 "��" => "��",
		 "�C" => "��",
		 "�K" => "��",
		 "�E" => "��",
		 "�Q" => "ʮ",
		 "��" => "��",
		 "�d" => "ǧ", 
		 "�U" => "��",
		 "��" => "��",
		 "��" => "��", 
		 "��" => "��");


my %simp2trad = ("��" => "�t", 
		 "��" => "�s", 
		 "һ" => "�@",
		 "��" => "�G",
		 "�" => "�T", 
		 "��" => "�|",
		 "��" => "��",
		 "��" => "��",
		 "��" => "�C",
		 "��" => "�K",
		 "��" => "�E",
		 "ʮ" => "�Q",
		 "��" => "��",
		 "ǧ" => "�d", 
		 "��" => "�U",
		 "��" => "��",
		 "��" => "��", 
		 "��" => "��");


my %trad2formal = ("�t" => "�t", 
		   "�s" => "�s", 
		   "�@" => "��",
		   "�G" => "�L",
		   "�T" => "��", 
		   "�|" => "�v",
		   "��" => "��",
		   "��" => "��",
		   "�C" => "�m",
		   "�K" => "��",
		   "�E" => "�h",
		   "�Q" => "�B",
		   "��" => "��",
		   "�d" => "�a", 
		   "�U" => "�U",
		   "��" => "��",
		   "��" => "��", 
		   "��" => "��");

my %trad2formalgb = ("�t" => "", 
		   "�s" => "", 
		   "�@" => "",
		   "�G" => "",
		   "�T" => "", 
		   "�|" => "",
		   "��" => "",
		   "��" => "",
		   "�C" => "",
		   "�K" => "",
		   "�E" => "",
		   "�Q" => "",
		   "��" => "",
		   "�d" => "", 
		   "�U" => "",
		   "��" => "",
		   "��" => "", 
		   "��" => "");


my %trad2unicode = ("�t" => "8CA0", 
		    "�s" => "96F6", 
		    "�@" => "4E00",
		    "�G" => "4E8C",
		    "�T" => "4E09", 
		    "�|" => "56DB",
		    "��" => "4E94",
		    "��" => "516D",
		    "�C" => "4E03",
		    "�K" => "516B",
		    "�E" => "4E5D",
		    "�Q" => "5341",
		    "��" => "767E",
		    "�d" => "5343", 
		    "�U" => "842C",
		    "��" => "5104",
		    "��" => "5146", 
		    "��" => "5169");

my %unicode2trad = ("8CA0" => "�t", 
		    "8D1F" => "�t", # simp
		    "96F6" => "�s", 
		    "4E00" => "�@",
		    "4E8C" => "�G",
		    "4E09" => "�T", 
		    "56DB" => "�|",
		    "4E94" => "��",
		    "516D" => "��",
		    "4E03" => "�C",
		    "516B" => "�K",
		    "4E5D" => "�E",
		    "5341" => "�Q",
		    "767E" => "��",
		    "5343" => "�d", 
		    "842C" => "�U",
		    "4E07" => "�U", # simp
		    "5104" => "��",
		    "4EBF" => "��", # simp
		    "5146" => "��", 
		    "5169" => "��", # simp
		    "4E24" => "��");


my %trad2pinyin = ("�t" => "fu4", 
		   "�s" => "ling2", 
		   "�@" => "yi1",
		   "�G" => "er4",
		   "�T" => "san1", 
		   "�|" => "si4",
		   "��" => "wu3",
		   "��" => "liu4",
		   "�C" => "qi1",
		   "�K" => "ba1",
		   "�E" => "jiu3",
		   "�Q" => "shi2",
		   "��" => "bai3",
		   "�d" => "qian1", 
		   "�U" => "wan4",
		   "��" => "yi4",
		   "��" => "zhao4", 
		   "��" => "liang3");

my %trad2yalecant = ("�t" => "fu", 
		   "�s" => "ling2", 
		   "�@" => "yat",
		   "�G" => "yih7",
		   "�T" => "saam1", 
		   "�|" => "sei5",
		   "��" => "ng4",
		   "��" => "luhk",
		   "�C" => "chat1",
		   "�K" => "baat1",
		   "�E" => "gao3",
		   "�Q" => "sap7",
		   "��" => "baak5",
		   "�d" => "chin1", 
		   "�U" => "maahn",
		   "��" => "yik1",
		   "��" => "siu", 
		   "��" => "leung4");


my %trad2jyutpin = ("�t" => "fu6", 
		   "�s" => "ling4", 
		   "�@" => "jat1",
		   "�G" => "ji6",
		   "�T" => "saam1", 
		   "�|" => "sei3",
		   "��" => "ng5",
		   "��" => "luk6",
		   "�C" => "cat1",
		   "�K" => "baat3",
		   "�E" => "gau2",
		   "�Q" => "sap6",
		   "��" => "baak3",
		   "�d" => "cin1", 
		   "�U" => "maan6",
		   "��" => "jik1",
		   "��" => "siu6", 
		   "��" => "loeng5");



sub new {
    return bless {};
}


# The heart of the program.  Does the actual conversion
sub EnglishToChineseNumber {
    my($self) = shift;
    my($enumber) = shift;
    my($outputtype) = shift;
    if ($outputtype eq "") {
	$outputtype = $default_outputtype;
    }
    $outputtype = lc($outputtype);
#    print "Output type : $outputtype\n";

    my(@powers) = ();
    my($power) = 0;
    my($value) = 0;
    my($negative) = 0;     # is it a negative integer?
    my($inzero) = 0;       # are we in a stretch or 1 or more zeros (only add one zero for the stretch)
    my($canaddzero) = 0;   # only add a zero if there's something non-zero on both sides of it
    my($cnumber) = "";     # the final result

    # Remove all non-digits
    $enumber =~ s/[^0-9\.-]//g;

    # If zero, just return zero
    if ($enumber == 0) {
	return $digits[0];
    }

    # Check if it's negative, set the negative flag and make it positive
    if ($enumber < 0) {
	$negative = 1;
	$enumber = -$enumber;
    }

    # Get the value of the coefficient for each power of ten
    while ($TEN ** $power <= $enumber) {
	$value = ($enumber % ($TEN** ($power+1)))/($TEN**$power);
	$powers[$power] = $value;
	
	# Subtract out the current power's coefficient and increase the power
	$enumber -= $enumber % ($TEN**($power+1));
	$power++;
    }
    

    my($i);
    # Take the decomposition of the number for above and generate the Chinese equivalent
    for ($i = 0; $i < $power; $i++) {
	#System.out.println("10^" + i + ":\t" + powers[i]);
	
	if (($i % 4) == 0) {  # Reached the next four powers up level
	    if ($powers[$i] != 0) {
		$inzero = 0;
		$canaddzero = 1;
		$cnumber =  $digits[$powers[$i]] . $afterWan[$i/4] . $cnumber;
	    } else {
		# Check that something in the next three powers is non-zero before adding 
		if ((($i+3 < $power) && $powers[$i+3] != 0) ||
		    (($i+2 < $power) && $powers[$i+2] != 0) ||
		    (($i+1 < $power) && $powers[$i+1] != 0)) 
		{
		    $cnumber = $afterWan[$i/4] . $cnumber;
		    $canaddzero = 0; # added
		}
	    }
	} else {  # Add one, tens, hundreds, or thousands place for each level
	    if ($powers[$i] != 0) {
		$inzero = 0;
		$canaddzero = 1;
		if ($power == 2 && $i == 1 && $powers[$i] == 1) {  # No �@ with 10 through 19
		    $cnumber = $beforeWan[($i % 4)-1] . $cnumber;
		    #} else if ((i%4 = 3) && powers[i] == 2) {  # when to use liang3 vs. er4
		    #cnumber.insert(0, ALTTWO + beforeWan[(i%4)-1]);
		} else {
		    $cnumber = $digits[$powers[$i]] . $beforeWan[($i%4)-1] . $cnumber;
		}
	    } else {
		if ($canaddzero == 1 && $inzero == 0) { # Only insert one �s for all consecutive zeroes
		    $inzero = 1;
		    $cnumber = $digits[$powers[$i]] . $cnumber;
		}
	    }
	}
    }

    # Add the negative character
    if ($negative == 1) {
	$cnumber = $minus . $cnumber;
    }
 
    my($result, $j);
    if ($outputtype eq "big5") {
	$result = $cnumber;
    } elsif ($outputtype eq "gb") {
	for ($j = 0; $j < length($cnumber); $j+=2) {
	    $result .= $trad2simp{substr($cnumber, $j, 2)};
	}
    } elsif ($outputtype eq "formalb5") {
	for ($j = 0; $j < length($cnumber); $j+=2) {
	    $result .= $trad2formal{substr($cnumber, $j, 2)};
	}

    } elsif ($outputtype eq "formalgb") {
	for ($j = 0; $j < length($cnumber); $j+=2) {
	    $result .= $trad2simp{substr($cnumber, $j, 2)};
	    #$result .= $trad2formalgb{substr($cnumber, $j, 2)};
	}

    } elsif ($outputtype eq "utf8") {
	for ($j = 0; $j < length($cnumber); $j+=2) {
	    $result .= &hex2utf8($trad2unicode{substr($cnumber, $j, 2)});
	}

    } elsif ($outputtype eq "unicodehex") {
	for ($j = 0; $j < length($cnumber); $j+=2) {
	    $result .= $trad2unicode{substr($cnumber, $j, 2)} . " ";
	}

    } elsif ($outputtype eq "pinyin") {
	for ($j = 0; $j < length($cnumber); $j+=2) {
	    $result .= $trad2pinyin{substr($cnumber, $j, 2)} . " ";
	}

    } elsif ($outputtype eq "jyutpin") {
	for ($j = 0; $j < length($cnumber); $j+=2) {
	    $result .= $trad2jyutpin{substr($cnumber, $j, 2)} . " ";
	}
    } elsif ($outputtype eq "yalecant") {
	for ($j = 0; $j < length($cnumber); $j+=2) {
	    $result .= $trad2yalecant{substr($cnumber, $j, 2)} . " ";
	}

    } else {
	$result = $cnumber;
    }
   
    return $result;
}


sub ChineseToEnglishNumber {
    my($self) = shift;
    my($inputnumber) = shift;
    my($inputtype);
    if (@_) {
	$inputtype = shift;
    } else {
	$inputtype = $default_inputtype;
    }
    my($i, $j);
    my($alldigits) = 1;
    my($cnumber);

    if ($inputtype eq "gb") {
 	for ($j = 0; $j < length($inputnumber); $j+=2) {
 	    $cnumber .= $simp2trad{substr($inputnumber, $j, 2)};
 	}
    } elsif ($inputtype eq "utf8") {
 	for ($j = 0; $j < length($inputnumber); $j+=3) {
 	    $cnumber .= $unicode2trad{&utf82hex(substr($inputnumber, $j, 3))};
 	}
    } else {
	$cnumber = $inputnumber;
    }
    
    if (length($cnumber) > 2) {
	for ($i = 0; $i < length($cnumber); $i+=2) {
	    if (!defined($digits{substr($cnumber, $i, 2)})) {
		$alldigits = 0;
	    }
	}
	if ($alldigits == 1) {
	    return &ChineseToEnglishBrief($cnumber);
	} else {
	    return &ChineseToEnglishFull($cnumber);
	}
    } else {
	return &ChineseToEnglishFull($cnumber);
    }
}


sub ChineseToEnglishBrief {
    my($cnumber) = shift;
    my($nextcchar);
    my($place, $digitval, $total) = (0,0,0);
    
    for ($place = 0; $place < length($cnumber)/2; $place++) {
	$digitval = $digits{substr($cnumber, $place*2, 2)};
	$total += $digitval * (10**(length($cnumber)/2 - 1 - $place));
    }
    
    return $total;
}

sub ChineseToEnglishFull {
    my($cnumber) = shift;
    my($negative) = 0;
    my($cnumlength) = length($cnumber);
    my($i);
    my($j, $digitval, $cchar);
    my($power) = 0;
    my($leveltotal) = 0;
    my($total);
    my($nextcchar);

    #print "In Chinese to English Full<BR>";
    for ($i = 0; $i < $cnumlength; $i+=2) {
	#print "$i ";
	$cchar = substr($cnumber, $i, 2);
	#print "$cchar $leveltotal $power<BR>";
	if ($i == 0 && ($cchar eq "" || $cchar eq '�t')) {
	    $negative = 1;
	} elsif ($i == 0 && $cchar eq '��') {
	    # Do nothing for now
	} elsif ($cchar eq '��') {
	    $power = 12;
	    $leveltotal = 1 if $leveltotal == 0;
	    $total += $leveltotal * (10 ** $power);
	    $leveltotal = 0;
	    $power -= 4;
	    #$power = 0;
	} elsif ($cchar eq '��') {
	    $power = 8;
	    $leveltotal = 1 if $leveltotal == 0;
	    $total += $leveltotal * (10** $power);
	    $leveltotal = 0;
	    $power -= 4;
	    #$power = 0;
	} elsif ($cchar eq '�U') {
	    $power = 4;
	    $leveltotal = 1 if $leveltotal == 0;
	    $total += $leveltotal * (10**$power);
	    $leveltotal = 0;
	    $power -= 4;
	    #$power = 0;
	} elsif ($cchar eq '�d') {
	    $leveltotal += 1000;
	} elsif ($cchar eq "��") {
	    $leveltotal += 100;
	} elsif ($cchar eq "�Q") {
	    $leveltotal += 10;
	} elsif ($cchar eq "�s") {
	    $power = 0;
	} elsif ($cchar eq "�s" ||
		 $cchar eq "�@" ||
		 $cchar eq "��" ||
		 $cchar eq "�G" ||
		 $cchar eq "�T" ||
		 $cchar eq "�|" || 
		 $cchar eq "��" ||
		 $cchar eq "��" || 
		 $cchar eq "�C" ||
		 $cchar eq "�K" ||
		 $cchar eq "�E") {
	    
	    $digitval = $digits{substr($cnumber, $i, 2)};
	    #print "Digit val is $digitval<BR>\n";
		
	    if ($i+2 < $cnumlength) {
		$nextcchar = substr($cnumber, $i+2, 2);
		if ($nextcchar eq "�Q") {
		    $leveltotal += $digitval * 10;
		    $i+=2;
		} elsif ($nextcchar eq "��") {
		    $leveltotal += $digitval * 100;
		    $i+=2;
		} elsif ($nextcchar eq "�d") {
		    $leveltotal += $digitval * 1000;
		    $i+=2;
		} else {
		    $leveltotal += $digitval;
		}
	    } else {
		$leveltotal += $digitval;
	    }
	} else {
	    print STDERR "Seems to be an error in the number. $cnumber\n";
	    return "";
	    # return negative infinity;
	}
	
    }

    # Catch remaining leveltotal
    #print("Level total " + $leveltotal + " power " + $power + " ten to power " + (10**$power)/10);
    $total += $leveltotal * 10** $power;
    
    if ($negative == 1) { $total = -$total; }
    
    return $total;
}

sub chinese_output {
    my($self) = shift;
    if (@_) { $default_outputtype = shift }
    return $default_outputtype;
}

sub chinese_input {
    my($self) = shift;
    if (@_) { $default_inputtype = shift }
    return $default_inputtype;
}


# hex2utf8:  Take a string of 4 hex digits (0-9A-F) and convert it
# to the corresponding (1, 2, or 3 byte) UTF-8 representation.

sub hex2utf8 {
    my($hexchar) = shift;
    my($binchar, $retval, $bin1, $bin2, $bin3);
    
    if ($hexchar !~ m/^0x/) {
	$hexchar = "0x" . $hexchar;
    }
    #print STDERR $hexchar ."\n";
    $binchar = oct($hexchar);
    if ($binchar <= 127) {
	$retval = pack("C", $binchar);
    } elsif ($binchar <= 2047) {
	$bin1 = $binchar;
	$bin1 >>= 6;
	$bin1 |= 0xC0;
	$bin2 = $binchar;
	$bin2 &= 0x3F;
	$bin2 |= 0x80;
	$retval = pack("C2", $bin1, $bin2);
    } else {
	$bin1 = $binchar;
	$bin1 >>= 12;
	$bin1 |= 0xE0;
	$bin2 = $binchar;
	$bin2 &= 0x0FFF;
	$bin2 >>= 6;
	$bin2 |= 0x80;
	$bin3 = $binchar;
	$bin3 &= 0x003F;
	$bin3 |= 0x80;
	$retval = pack("C*", $bin1, $bin2, $bin3);
    }
    return $retval;
}


sub utf82hex {
    my($utfstring) = @_;
    my($unichar, $unival, $unistring, $i, $int1, $int2, $int3, $byte1, $byte2, $byte3);
    my($hex1, $hex2, $hexstring);

    $i = 0;
    while ($i < length($utfstring)) {
	$byte1 = substr($utfstring, $i, 1);
	if (unpack("C", $byte1) <= 0x7F) { # 1 byte long (ASCII)
	    $unichar = pack("C", 0x00) . $byte1;
	    $i++;
	} elsif ((unpack("C", $byte1) & 0xE0) == 0xC0) { # 2 bytes long
	    $byte2 = substr($utfstring, $i+1, 1);
	    $int1 = unpack("C", $byte1) & 0x1F;
	    $int1 <<= 0x06;
	    $int2 = unpack("C", $byte2) & 0x3F;
	    $unival = $int1 | $int2;
	    $unichar = pack("CC", (0xFF00 & $unival) >> 8, (0x00FF & $unival));
	    $i += 2;
	} else {  # 3 bytes long
	    $byte2 = substr($utfstring, $i+1, 1);
	    $byte3 = substr($utfstring, $i+2, 1);

	    $int1 = 0x0F & unpack("C", $byte1);
	    $int1 <<= 12;
	    $int2 = 0x3F & unpack("C", $byte2);
	    $int2 <<= 6;
	    $int3 = 0x3F & unpack("C", $byte3);
	    $unival = $int1 | $int2 | $int3;
	    $unichar = pack("CC", (0xFF00 & $unival) >> 8, (0x00FF & $unival));
	    $i += 3;
	}
	$unistring .= $unichar;
    }

    $hex1 = unpack "H2", substr($unistring, 0, 1);
    $hex2 = unpack "H2", substr($unistring, 1, 1);
    $hexstring = "\U$hex1$hex2\E";

    return $hexstring;
}


END { }


1;

Anon7 - 2021