|
Server : Apache/2.4.62 System : FreeBSD fbsdweb2.web.rcn.net 14.1-RELEASE FreeBSD 14.1-RELEASE releng/14.1-n267679-10e31f0946d8 GENERIC amd64 User : www ( 80) PHP Version : 8.3.8 Disable Function : NONE Directory : /domains/mandarintools/download/ |
Upload File : |
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "SinoDetect.h"
/* Simple test program to use with SinoDetect
Read in text from a file (either line by line or in large chunks
and run the detection code on the text.
Prints out detector's guess for the file.
*/
main(int argc, char** argv) {
FILE *srcfile;
char rawtext[20000];
int encoding_guess;
SinoDetect sinodetector;
if (argc < 2) {
fprintf(stderr, "Need a file name.\n");
exit(1);
}
if ((srcfile = fopen(argv[1], "r")) != NULL) {
// while (fgets(rawtext, 19998, srcfile) > 0) { // Line by line
if (fread(rawtext, sizeof(char), 19998, srcfile) > 0) { // Chunk by Chunk
encoding_guess = sinodetector.detect_encoding((unsigned char*) rawtext);
// Pretty print the encoding guess
if (encoding_guess == SinoDetect::GB2312) {
printf("Encoding is GB-2312\n");
} else if (encoding_guess == SinoDetect::GBK) {
printf("Encoding is GBK\n");
} else if (encoding_guess == SinoDetect::HZ) {
printf("Encoding is HZ\n");
} else if (encoding_guess == SinoDetect::BIG5) {
printf("Encoding is Big5\n");
} else if (encoding_guess == SinoDetect::BIG5PLUS) {
printf("Encoding is Big5+\n");
} else if (encoding_guess == SinoDetect::EUC_TW) {
printf("Encoding is EUC-TW (CNS 11643)\n");
} else if (encoding_guess == SinoDetect::ISO_2022_CN) {
printf("Encoding is ISO 2022-CN\n");
} else if (encoding_guess == SinoDetect::UTF8) {
printf("Encoding is UTF-8\n");
} else if (encoding_guess == SinoDetect::UTF16) {
printf("Encoding is UTF-16\n");
} else if (encoding_guess == SinoDetect::EUC_KR) {
printf("Encoding is EUC-KR\n");
} else if (encoding_guess == SinoDetect::CP949) {
printf("Encoding is CP949\n");
} else if (encoding_guess == SinoDetect::ISO_2022_KR) {
printf("Encoding is ISO 2022-KR\n");
} else if (encoding_guess == SinoDetect::SJIS) {
printf("Encoding is SJIS\n");
} else if (encoding_guess == SinoDetect::EUC_JP) {
printf("Encoding is EUC-JP\n");
} else if (encoding_guess == SinoDetect::ISO_2022_JP) {
printf("Encoding is ISO 2022-JP\n");
} else if (encoding_guess == SinoDetect::ASCII) {
printf("Encoding is ASCII\n");
} else if (encoding_guess == SinoDetect::OTHER) {
printf("Encoding is OTHER\n");
}
};
fclose(srcfile);
} else {
printf("Error opening file: %s\n", argv[1]);
exit(1);
}
return 0;
}