|
Server : Apache/2.4.62 System : FreeBSD fbsdweb2.web.rcn.net 14.1-RELEASE FreeBSD 14.1-RELEASE releng/14.1-n267679-10e31f0946d8 GENERIC amd64 User : www ( 80) PHP Version : 8.3.8 Disable Function : NONE Directory : /domains/mandarintools/pyconv/ |
Upload File : |
import java.lang.*;
import java.io.*;
import java.util.*;
class pyConvert {
static public int PY = 0;
static public int WG = 1;
static public int YALE = 2;
static public int GI = 3;
static public int BPMF = 4;
static public int GR = 5;
static public int FR = 6;
static public int TOTALPY = 7;
final Hashtable[] chartree;
String[][] pyTable;
String[] pySeparator;
String[] pyChars;
public pyConvert() {
int i;
pySeparator = new String[TOTALPY];
pySeparator[PY] = "'";
pyChars = new String[TOTALPY];
pyChars[PY] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstu\u00FCvwxyz:1234";
pyChars[WG] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstu\u00FCvwxyz^\':1234";
pyChars[YALE] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234";
pyChars[GI] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstu\u00FCvwxyz1234";
pyChars[BPMF] = "�t�u�v�w�x�y�z�{�|�}�~�����������������������������������������@��������������";
pyChars[GR] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.";
pyChars[FR] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmno\u00F6pqrstuvwxyz^\':1234";
pyTable = new String[2036][TOTALPY];
chartree = new Hashtable[TOTALPY];
for (i = 0; i < TOTALPY; i++) {
chartree[i] = new Hashtable();
}
load_table();
}
private void load_table() {
String dataline, hashkey, variantkey;
StringTokenizer pytokenizer, varianttokenizer;
Hashtable hashnode;
int i = 0, treelevel = 0, pyindex = 0;
try {
InputStream pydata = getClass().getResourceAsStream("zh_old.txt");
BufferedReader in = new BufferedReader(new InputStreamReader(pydata, "Big5"));
do {
dataline = in.readLine();
if (dataline == null) { continue; }
//System.out.println(dataline);
// Skip commented lines
if (dataline.charAt(0) == '#') {
continue;
}
// Parse out the different romanization systems
pytokenizer = new StringTokenizer(dataline);
i = 0;
while (pytokenizer.hasMoreTokens() == true) {
hashkey = pytokenizer.nextToken();
if (hashkey.indexOf(",") != -1) {
pyTable[pyindex][i] = hashkey.substring(0, hashkey.indexOf(","));
} else {
pyTable[pyindex][i] = new String(hashkey);
}
//System.out.println(hashkey);
varianttokenizer = new StringTokenizer(hashkey, ",");
while (varianttokenizer.hasMoreTokens() == true) {
variantkey = varianttokenizer.nextToken();
hashnode = chartree[i];
for (treelevel = 0; treelevel < variantkey.length(); treelevel++) {
if (hashnode.containsKey(variantkey.substring(treelevel,treelevel+1).intern())
== true) {
// Do nothing, wait for next loop
} else {
// Add new hashnode to the tree
hashnode.put(variantkey.substring(treelevel,treelevel+1).intern(),
new Hashtable());
}
hashnode =
(Hashtable)hashnode.get(variantkey.substring(treelevel,treelevel+1).intern());
}
hashnode.put("EOW", new Integer(pyindex));
}
i++;
}
pyindex++;
} while (dataline != null);
}
catch (IOException e) {
System.out.println("IOException:"+e);
}
}
int py_lookup(int pytype, String pysrc) {
Hashtable pyhash = chartree[pytype];
Hashtable hashnode;
int treelevel = 0;
hashnode = pyhash;
while (hashnode.containsKey(pysrc.substring(treelevel, treelevel+1).intern()) == true &&
treelevel < pysrc.length()) {
hashnode = (Hashtable)hashnode.get(pysrc.substring(treelevel, treelevel+1).intern());
treelevel++;
}
if (hashnode.containsKey("EOW") == true) {
return ((Integer)hashnode.get("EOW")).intValue();
} else {
return -1;
}
}
String py_lookup(int pytypein, int pytypeout, String pysrc) {
Hashtable hashnode;
int treelevel = 0;
hashnode = chartree[pytypein];
while (treelevel < pysrc.length() &&
hashnode.containsKey(pysrc.substring(treelevel, treelevel+1).intern()) == true) {
hashnode = (Hashtable)hashnode.get(pysrc.substring(treelevel, treelevel+1).intern());
treelevel++;
}
if (hashnode.containsKey("EOW") == true) {
System.out.println((Integer)hashnode.get("EOW"));
return pyTable[((Integer)hashnode.get("EOW")).intValue()][pytypeout];
} else {
return null;
}
}
String pyConvertString(int pytypein, int pytypeout, String pysrc) {
Hashtable hashnode, hashmarker;
StringBuffer pytarget = new StringBuffer("");
int treelevel = 0, treemarker = 0;
pysrc = pysrc.toLowerCase();
while (treelevel < pysrc.length()) {
hashnode = chartree[pytypein];
hashmarker = hashnode;
treemarker = treelevel;
while (treelevel < pysrc.length() &&
hashnode.containsKey(pysrc.substring(treelevel, treelevel+1).intern()) == true) {
hashnode = (Hashtable)hashnode.get(pysrc.substring(treelevel, treelevel+1).intern());
treelevel++;
if (hashnode.containsKey("EOW") == true) {
treemarker = treelevel;
hashmarker = hashnode;
}
}
if (hashnode.containsKey("EOW") == true) {
//System.out.println((Integer)hashnode.get("EOW"));
pytarget.append(pyTable[((Integer)hashnode.get("EOW")).intValue()][pytypeout]);
} else if (treelevel < pysrc.length() &&
hashnode.containsKey(pysrc.substring(treelevel, treelevel+1).intern()) == false) {
//if nowhere to backtrack to, return original
pytarget.append(pysrc.substring(treemarker, treelevel+1));
treelevel++;
} else {
// Backtrack
//if nowhere to backtrack to, return original
pytarget.append(pysrc.substring(treemarker, treelevel));
treelevel++;
}
}
return new String(pytarget);
}
public String pyConvertLine(int pytypein, int pytypeout, String srcline) {
StringBuffer targetline = new StringBuffer("");
int i, strstart, strend;
// Loop though line, finding possible romanization strings and converting
for (i = 0; i < srcline.length(); i++) {
if (pyChars[pytypein].indexOf(srcline.charAt(i)) == -1) {
// Not a possible character for this romanization
targetline.append(srcline.charAt(i));
} else {
// A possible character for this romanization
strstart = i;
strend = i;
while (strend < srcline.length() &&
pyChars[pytypein].indexOf(srcline.charAt(strend)) != -1) {
strend++;
}
i = strend - 1;
targetline.append(pyConvertString(pytypein, pytypeout, srcline.substring(strstart, strend)));
}
}
return new String(targetline);
}
public static void main(String argc[]) {
FileInputStream srcfile;
pyConvert pyconverter = new pyConvert();
BufferedReader in;
String srcline;
int[] pytypes;
char pytypearg;
// Print help message if requested
if (argc.length > 0 && (argc[0].equals("h") == true || argc[0].equals("-h")== true)) {
System.out.println("Usage: pyConvert -[pwyigfh][pwyigfh] sourcefile");
System.out.println("First letter is source romanization, second letter");
System.out.println("is target romanization.");
System.out.println(" p = Hanyu Pinyin, w = Wade Giles, y = Yale");
System.out.println(" i = GuoinII, g = Gwoyeu Romatzyh, f = French");
System.exit(0);
}
// Check for arguments
if (argc.length < 2) {
System.err.println("Please specify conversion direction and file name.");
System.exit(0);
}
// Determine conversion direction
pytypes = new int[2];
argc[0] = argc[0].toLowerCase();
for (int i = 0; i < 2; i++) {
pytypearg = argc[0].charAt(i+1);
if (pytypearg == 'p') {
pytypes[i] = PY;
} else if (pytypearg == 'w') {
pytypes[i] = WG;
} else if (pytypearg == 'y') {
pytypes[i] = YALE;
} else if (pytypearg == 'i') {
pytypes[i] = GI;
} else if (pytypearg == 'g') {
pytypes[i] = GR;
} else if (pytypearg == 'f') {
pytypes[i] = FR;
};
}
// Convert file, line by line
try {
srcfile = new FileInputStream(argc[1]);
in = new BufferedReader(new InputStreamReader(srcfile, "Big5"));
while ((srcline = in.readLine()) != null) {
System.out.println(pyconverter.pyConvertLine(pytypes[0], pytypes[1], srcline));
}
}
catch (Exception exc) {
System.err.println(exc);
}
}
}