1. medoc
  2. uchardet-enhanced

Source

uchardet-enhanced / libcharsetdetect / mozilla / extensions / universalchardet / src / base / nsSBCSGroupProber.cpp

Diff from to

File libcharsetdetect/mozilla/extensions/universalchardet/src/base/nsSBCSGroupProber.cpp

  • Ignore whitespace
   mProbers[3] = new nsSingleByteCharSetProber(&MacCyrillicModel);
   mProbers[4] = new nsSingleByteCharSetProber(&Ibm866Model);
   mProbers[5] = new nsSingleByteCharSetProber(&Ibm855Model);
-  mProbers[6] = new nsSingleByteCharSetProber(&Latin7Model);
-  mProbers[7] = new nsSingleByteCharSetProber(&Win1253Model);
+  mProbers[6] = new nsSingleByteCharSetProber(&ISO_8859_7greekModel);
+  mProbers[7] = new nsSingleByteCharSetProber(&WINDOWS_1253greekModel);
   mProbers[8] = new nsSingleByteCharSetProber(&Latin5BulgarianModel);
   mProbers[9] = new nsSingleByteCharSetProber(&Win1251BulgarianModel);
   mProbers[10] = new nsSingleByteCharSetProber(&TIS620ThaiModel);
     }
   }
 
-  // disable latin2 before latin1 is available, otherwise all latin1 
-  // will be detected as latin2 because of their similarity.
-  //mProbers[10] = new nsSingleByteCharSetProber(&Latin2HungarianModel);
-  //mProbers[11] = new nsSingleByteCharSetProber(&Win1250HungarianModel);
+  mProbers[14] = new nsSingleByteCharSetProber(&Latin2HungarianModel);
+  mProbers[15] = new nsSingleByteCharSetProber(&Win1250HungarianModel);
+  mProbers[16] = new nsSingleByteCharSetProber(&WINDOWS_1252frenchModel);
+  mProbers[17] = new nsSingleByteCharSetProber(&WINDOWS_1252germanModel);
+  mProbers[18] = new nsSingleByteCharSetProber(&WINDOWS_1252swedishModel);
+  mProbers[19] = new nsSingleByteCharSetProber(&ISO_8859_9turkishModel);
+  mProbers[20] = new nsSingleByteCharSetProber(&WINDOWS_1252finnishModel);
+  mProbers[21] = new nsSingleByteCharSetProber(&windows_1252spanishModel);
+  mProbers[22] = new nsSingleByteCharSetProber(&iso_8859_2czechModel);
+  mProbers[23] = new nsSingleByteCharSetProber(&iso_8859_2polishModel);
 
   Reset();
 }
   PRUint32 i;
   char *newBuf1 = 0;
   PRUint32 newLen1 = 0;
+  char *newBuf2 = 0;
+  PRUint32 newLen2 = 0;
 
-  //apply filter to original buffer, and we got new buffer back
-  //depend on what script it is, we will feed them the new buffer 
-  //we got after applying proper filter
-  //this is done without any consideration to KeepEnglishLetters
-  //of each prober since as of now, there are no probers here which
-  //recognize languages with English characters.
   if (!FilterWithoutEnglishLetters(aBuf, aLen, &newBuf1, newLen1))
     goto done;
+  if (!FilterWithEnglishLetters(aBuf, aLen, &newBuf2, newLen2))
+    goto done;
   
   if (newLen1 == 0)
     goto done; // Nothing to see here, move on.
   {
      if (!mIsActive[i])
        continue;
-     st = mProbers[i]->HandleData(newBuf1, newLen1);
+     if (mProbers[i]->KeepEnglishLetters()) {
+       st = mProbers[i]->HandleData(newBuf2, newLen2);
+     } 
+     else 
+     {
+       st = mProbers[i]->HandleData(newBuf1, newLen1);
+     }
      if (st == eFoundIt)
      {
        mBestGuess = i;
   for (i = 0; i < NUM_OF_SBCS_PROBERS; i++)
   {
     if (!mIsActive[i])
-      printf("  inactive: [%s] (i.e. confidence is too low).\r\n", mProbers[i]->GetCharSetName());
+      printf("  inactive: [%s] [%s](i.e. confidence is too low).\r\n", 
+             mProbers[i]? mProbers[i]->GetCharSetName() : "UNSET",
+             mProbers[i]? mProbers[i]->GetLangName() : "UNSET"
+        );
     else
       mProbers[i]->DumpStatus();
   }
-  printf(" SBCS Group found best match [%s] confidence %f.\r\n",  
-         mProbers[mBestGuess]->GetCharSetName(), cf);
+  printf(" SBCS Group found best match [%s] [%s] confidence %f.\r\n",  
+         mProbers[mBestGuess]->GetCharSetName(), 
+         mProbers[mBestGuess]->GetLangName(), 
+         cf);
 }
 #endif