KGRKJGETMRETU895U-589TY5MIGM5JGB5SDFESFREWTGR54TY
Server : Apache/2.4.62
System : FreeBSD fbsdweb2.web.rcn.net 14.1-RELEASE FreeBSD 14.1-RELEASE releng/14.1-n267679-10e31f0946d8 GENERIC amd64
User : www ( 80)
PHP Version : 8.3.8
Disable Function : NONE
Directory :  /domains/mandarintools/download/

Upload File :
current_dir [ Writeable ] document_root [ Writeable ]

 

Current File : /domains/mandarintools/download/detecttest.cpp
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "SinoDetect.h"


/* Simple test program to use with SinoDetect
   Read in text from a file (either line by line or in large chunks
   and run the detection code on the text.
   Prints out detector's guess for the file.
*/


main(int argc, char** argv) {
  FILE *srcfile;
  char rawtext[20000];
  int encoding_guess;
  SinoDetect sinodetector;
  
  if (argc < 2) {
    fprintf(stderr, "Need a file name.\n");
    exit(1);
  }
  
  if ((srcfile = fopen(argv[1], "r")) != NULL) {
    //		while (fgets(rawtext, 19998, srcfile) > 0) {  // Line by line
    
    if (fread(rawtext, sizeof(char), 19998, srcfile) > 0) {  // Chunk by Chunk
      encoding_guess = sinodetector.detect_encoding((unsigned char*) rawtext);
      
      // Pretty print the encoding guess
      if (encoding_guess == SinoDetect::GB2312) {
	printf("Encoding is GB-2312\n");
      } else if (encoding_guess == SinoDetect::GBK) {
	printf("Encoding is GBK\n");
      } else if (encoding_guess == SinoDetect::HZ) {
	printf("Encoding is HZ\n");
      } else if (encoding_guess == SinoDetect::BIG5) {
	printf("Encoding is Big5\n");
      } else if (encoding_guess == SinoDetect::BIG5PLUS) {
	printf("Encoding is Big5+\n");
      } else if (encoding_guess == SinoDetect::EUC_TW) {
	printf("Encoding is EUC-TW (CNS 11643)\n");
      } else if (encoding_guess == SinoDetect::ISO_2022_CN) {
	printf("Encoding is ISO 2022-CN\n");
      } else if (encoding_guess == SinoDetect::UTF8) {
	printf("Encoding is UTF-8\n");
      } else if (encoding_guess == SinoDetect::UTF16) {
	printf("Encoding is UTF-16\n");
      } else if (encoding_guess == SinoDetect::EUC_KR) {
	printf("Encoding is EUC-KR\n");
      } else if (encoding_guess == SinoDetect::CP949) {
	printf("Encoding is CP949\n");
      } else if (encoding_guess == SinoDetect::ISO_2022_KR) {
	printf("Encoding is ISO 2022-KR\n");
      } else if (encoding_guess == SinoDetect::SJIS) {
	printf("Encoding is SJIS\n");
      } else if (encoding_guess == SinoDetect::EUC_JP) {
	printf("Encoding is EUC-JP\n");
      } else if (encoding_guess == SinoDetect::ISO_2022_JP) {
	printf("Encoding is ISO 2022-JP\n");
      } else if (encoding_guess == SinoDetect::ASCII) {
	printf("Encoding is ASCII\n");
      } else if (encoding_guess == SinoDetect::OTHER) {
	printf("Encoding is OTHER\n");
      }
    };
    fclose(srcfile);
  } else {
    printf("Error opening file: %s\n", argv[1]);
    exit(1);
  }
  
  
  return 0;
}


Anon7 - 2021