Commits

Anonymous committed 4248fb0

reconcile w/p4

Comments (0)

Files changed (362)

-include $(call all-subdir-makefiles)
+LOCAL_PATH:= $(call my-dir)
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+         $(call all-subdir-java-files)
+
+LOCAL_PACKAGE_NAME := PinyinIME
+
+LOCAL_JNI_SHARED_LIBRARIES := libjni_pinyinime
+
+LOCAL_STATIC_JAVA_LIBRARIES := com.android.inputmethod.pinyin.lib
+
+LOCAL_CERTIFICATE := shared
+
+# Make sure our dictionary file is not compressed, so we can read it with
+# a raw file descriptor.
+LOCAL_AAPT_FLAGS := -0 .dat
+
+include $(BUILD_PACKAGE)
+
+MY_PATH := $(LOCAL_PATH)
+
+include $(MY_PATH)/jni/Android.mk
+include $(MY_PATH)/lib/Android.mk

AndroidManifest.xml

+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Copyright (C) 2009 The Android Open Source Project
+
+     Licensed under the Apache License, Version 2.0 (the "License");
+     you may not use this file except in compliance with the License.
+     You may obtain a copy of the License at
+
+          http://www.apache.org/licenses/LICENSE-2.0
+
+     Unless required by applicable law or agreed to in writing, software
+     distributed under the License is distributed on an "AS IS" BASIS,
+     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     See the License for the specific language governing permissions and
+     limitations under the License.
+-->
+<manifest xmlns:android="http://schemas.android.com/apk/res/android"
+    package="com.android.inputmethod.pinyin"
+    android:sharedUserId="android.uid.shared">
+        <uses-permission android:name="android.permission.VIBRATE"/>
+
+        <application android:icon="@drawable/app_icon"
+          android:label="@string/ime_name">
+            <service android:name=".PinyinDecoderService"
+                android:exported="true">
+                <intent-filter>
+                    <action android:name="com.android.inputmethod.pinyin.Decoder_Service" />
+                    <category android:name="android.intent.category.DEFAULT" />
+                </intent-filter>
+            </service>
+
+            <service android:name=".PinyinIME"
+                android:label="@string/ime_name"
+                    android:permission="android.permission.BIND_INPUT_METHOD">
+                <intent-filter>
+                    <action android:name="android.view.InputMethod" />
+                </intent-filter>
+                <meta-data android:name="android.view.im" android:resource="@xml/method" />
+            </service>
+
+            <activity android:name=".SettingsActivity"
+                android:label="@string/ime_settings_activity_name">
+                <intent-filter>
+                    <action android:name="android.intent.action.MAIN"/>
+                </intent-filter>
+            </activity>
+
+        </application>
+</manifest>

PinyinIME/Android.mk

-LOCAL_PATH:= $(call my-dir)
-
-include $(CLEAR_VARS)
-
-LOCAL_SRC_FILES := \
-         $(call all-subdir-java-files)
-
-LOCAL_PACKAGE_NAME := PinyinIME
-
-LOCAL_JNI_SHARED_LIBRARIES := libjni_pinyinime
-
-LOCAL_STATIC_JAVA_LIBRARIES := com.android.inputmethod.pinyin.lib
-
-LOCAL_CERTIFICATE := shared
-
-# Make sure our dictionary file is not compressed, so we can read it with
-# a raw file descriptor.
-LOCAL_AAPT_FLAGS := -0 .dat
-
-include $(BUILD_PACKAGE)
-
-MY_PATH := $(LOCAL_PATH)
-
-include $(MY_PATH)/jni/Android.mk
-include $(MY_PATH)/lib/Android.mk

PinyinIME/AndroidManifest.xml

-<?xml version="1.0" encoding="UTF-8"?>
-<!-- Copyright (C) 2009 The Android Open Source Project
-
-     Licensed under the Apache License, Version 2.0 (the "License");
-     you may not use this file except in compliance with the License.
-     You may obtain a copy of the License at
-
-          http://www.apache.org/licenses/LICENSE-2.0
-
-     Unless required by applicable law or agreed to in writing, software
-     distributed under the License is distributed on an "AS IS" BASIS,
-     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-     See the License for the specific language governing permissions and
-     limitations under the License.
--->
-<manifest xmlns:android="http://schemas.android.com/apk/res/android"
-    package="com.android.inputmethod.pinyin"
-    android:sharedUserId="android.uid.shared">
-        <uses-permission android:name="android.permission.VIBRATE"/>
-
-        <application android:icon="@drawable/app_icon"
-          android:label="@string/ime_name">
-            <service android:name=".PinyinDecoderService"
-                android:exported="true">
-                <intent-filter>
-                    <action android:name="com.android.inputmethod.pinyin.Decoder_Service" />
-                    <category android:name="android.intent.category.DEFAULT" />
-                </intent-filter>
-            </service>
-
-            <service android:name=".PinyinIME"
-                android:label="@string/ime_name"
-                    android:permission="android.permission.BIND_INPUT_METHOD">
-                <intent-filter>
-                    <action android:name="android.view.InputMethod" />
-                </intent-filter>
-                <meta-data android:name="android.view.im" android:resource="@xml/method" />
-            </service>
-
-            <activity android:name=".SettingsActivity"
-                android:label="@string/ime_settings_activity_name">
-                <intent-filter>
-                    <action android:name="android.intent.action.MAIN"/>
-                </intent-filter>
-            </activity>
-
-        </application>
-</manifest>

PinyinIME/jni/Android.mk

-LOCAL_PATH := $(call my-dir)
-
-### shared library
-
-include $(CLEAR_VARS)
-
-LOCAL_SRC_FILES := \
-	android/com_android_inputmethod_pinyin_PinyinDecoderService.cpp \
-	share/dictbuilder.cpp \
-	share/dictlist.cpp \
-	share/dicttrie.cpp \
-	share/lpicache.cpp \
-	share/matrixsearch.cpp \
-	share/mystdlib.cpp \
-	share/ngram.cpp \
-	share/pinyinime.cpp \
-	share/searchutility.cpp \
-	share/spellingtable.cpp \
-	share/spellingtrie.cpp \
-	share/splparser.cpp \
-	share/userdict.cpp \
-	share/utf16char.cpp \
-	share/utf16reader.cpp \
-	share/sync.cpp
-
-LOCAL_C_INCLUDES += $(JNI_H_INCLUDE)
-LOCAL_LDLIBS += -lpthread
-LOCAL_MODULE := libjni_pinyinime
-LOCAL_PRELINK_MODULE := false
-LOCAL_SHARED_LIBRARIES := libcutils libutils
-
-include $(BUILD_SHARED_LIBRARY)

PinyinIME/jni/android/com_android_inputmethod_pinyin_PinyinDecoderService.cpp

-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <assert.h>
-#include <cutils/log.h>
-#include <jni.h>
-#include <string.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-#include "../include/pinyinime.h"
-#include "../include/sync.h"
-#include "../include/userdict.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-using namespace ime_pinyin;
-
-#define RET_BUF_LEN 256
-
-static char16 retbuf[RET_BUF_LEN];
-static char16 (*predict_buf)[kMaxPredictSize + 1] = NULL;
-static size_t predict_len;
-
-static Sync sync_worker;
-
-static struct file_descriptor_offsets_t
-{
-  jclass mClass;
-  jfieldID mDescriptor;
-} gFileDescriptorOffsets;
-
-JNIEXPORT jboolean JNICALL nativeImOpenDecoder(JNIEnv* env, jclass jclazz,
-                                               jbyteArray fn_sys_dict,
-                                               jbyteArray fn_usr_dict) {
-  jbyte *fsd = (*env).GetByteArrayElements(fn_sys_dict, 0);
-  jbyte *fud = (*env).GetByteArrayElements(fn_usr_dict, 0);
-
-  if (im_open_decoder((const char*)fsd, (const char*)fud))
-    return JNI_TRUE;
-
-  return JNI_FALSE;
-}
-
-JNIEXPORT jboolean JNICALL nativeImOpenDecoderFd(JNIEnv* env, jclass jclazz,
-                                                 jobject fd_sys_dict,
-                                                 jlong startoffset,
-                                                 jlong length,
-                                                 jbyteArray fn_usr_dict) {
-  jint fd = env->GetIntField(fd_sys_dict, gFileDescriptorOffsets.mDescriptor);
-  jbyte *fud = (*env).GetByteArrayElements(fn_usr_dict, 0);
-
-  int newfd = dup(fd);
-  if (im_open_decoder_fd(newfd, startoffset, length, (const char*)fud)) {
-    close(newfd);
-    return JNI_TRUE;
-  }
-
-  close(newfd);
-  return JNI_FALSE;
-}
-
-JNIEXPORT void JNICALL nativeImSetMaxLens(JNIEnv* env, jclass jclazz,
-                                          jint max_sps_len,
-                                          jint max_hzs_len) {
-  im_set_max_lens(static_cast<size_t>(max_sps_len),
-                  static_cast<size_t>(max_hzs_len));
-  return;
-}
-
-JNIEXPORT jboolean JNICALL nativeImCloseDecoder(JNIEnv* env, jclass jclazz) {
-  im_close_decoder();
-  return JNI_TRUE;
-}
-
-JNIEXPORT jint JNICALL nativeImSearch(JNIEnv* env, jclass jclazz,
-                                      jbyteArray pybuf, jint pylen) {
-  jbyte *array_body = (*env).GetByteArrayElements(pybuf, 0);
-
-  if (NULL == array_body)
-    return 0;
-
-  return im_search((const char*)array_body, pylen);
-}
-
-JNIEXPORT jint JNICALL nativeImDelSearch(JNIEnv* env, jclass jclazz, jint pos,
-                                         jboolean is_pos_in_splid,
-                                         jboolean clear_fixed_this_step) {
-  return im_delsearch(pos, is_pos_in_splid, clear_fixed_this_step);
-}
-
-JNIEXPORT void JNICALL nativeImResetSearch(JNIEnv* env, jclass jclazz) {
-  im_reset_search();
-  return;
-}
-
-JNIEXPORT jint JNICALL nativeImAddLetter(JNIEnv *env, jclass clazz, jbyte ch) {
-  return im_add_letter(ch);
-}
-
-JNIEXPORT jstring JNICALL nativeImGetPyStr(JNIEnv* env, jclass jclazz,
-                                           jboolean decoded) {
-  size_t py_len;
-  const char *py = im_get_sps_str(&py_len);  // py_len gets decoded length
-  assert(NULL != py);
-  if (!decoded)
-    py_len = strlen(py);
-
-  const unsigned short *spl_start;
-  size_t len;
-  len = im_get_spl_start_pos(spl_start);
-
-  size_t i;
-  for (i = 0; i < py_len; i++)
-    retbuf[i] = py[i];
-  retbuf[i] = (char16)'\0';
-
-  jstring retstr = (*env).NewString((unsigned short*)retbuf, i);
-  return retstr;
-}
-
-JNIEXPORT jint JNICALL nativeImGetPyStrLen(JNIEnv* env, jclass jclazz,
-                                           jboolean decoded) {
-  size_t py_len;
-  const char *py = im_get_sps_str(&py_len);  // py_len gets decoded length
-  assert(NULL != py);
-  if (!decoded)
-    py_len = strlen(py);
-  return py_len;
-}
-
-JNIEXPORT jintArray JNICALL nativeImGetSplStart(JNIEnv* env, jclass jclazz) {
-  const unsigned short *spl_start;
-  size_t len;
-
-  // There will be len + 1 elements in the buffer when len > 0.
-  len = im_get_spl_start_pos(spl_start);
-
-  jintArray arr = (*env).NewIntArray(len + 2);
-  jint *arr_body = (*env).GetIntArrayElements(arr, 0);
-  assert(NULL != arr_body);
-  arr_body[0] = len; // element 0 is used to store the length of buffer.
-  for (size_t i = 0; i <= len; i++)
-    arr_body[i + 1] = spl_start[i];
-  return arr;
-}
-
-JNIEXPORT jstring JNICALL nativeImGetChoice(JNIEnv *env, jclass clazz,
-                                            jint candidateId) {
-  jstring retstr;
-  if(im_get_candidate(candidateId, retbuf, RET_BUF_LEN)) {
-    retstr = (*env).NewString(retbuf, utf16_strlen(retbuf));
-    return retstr;
-  } else {
-    retstr = (*env).NewString((unsigned short*)retbuf, 0);
-    return retstr;
-  }
-}
-
-JNIEXPORT jint JNICALL nativeImChoose(JNIEnv *env, jclass clazz,
-                                      jint choice_id) {
-  return im_choose(choice_id);
-}
-
-JNIEXPORT jint JNICALL nativeImCancelLastChoice(JNIEnv *env, jclass clazz) {
-  return im_cancel_last_choice();
-}
-
-JNIEXPORT jint JNICALL nativeImGetFixedLen(JNIEnv *env, jclass clazz) {
-  return im_get_fixed_len();
-}
-
-JNIEXPORT jboolean JNICALL nativeImCancelInput(JNIEnv *env, jclass clazz) {
-  if (im_cancel_input())
-    return JNI_TRUE;
-
-  return JNI_FALSE;
-}
-
-JNIEXPORT jboolean JNICALL nativeImFlushCache(JNIEnv *env, jclass clazz) {
-  im_flush_cache();
-  return JNI_TRUE;
-}
-
-JNIEXPORT jint JNICALL nativeImGetPredictsNum(JNIEnv *env, jclass clazz,
-                                              jstring fixed_str) {
-  char16 *fixed_ptr = (char16*)(*env).GetStringChars(fixed_str, false);
-  size_t fixed_len = (size_t)(*env).GetStringLength(fixed_str);
-
-  char16 fixed_buf[kMaxPredictSize + 1];
-
-  if (fixed_len > kMaxPredictSize) {
-    fixed_ptr += fixed_len - kMaxPredictSize;
-    fixed_len = kMaxPredictSize;
-  }
-  utf16_strncpy(fixed_buf, fixed_ptr, fixed_len);
-  fixed_buf[fixed_len] = (char16)'\0';
-
-  predict_len = im_get_predicts(fixed_buf, predict_buf);
-
-  return predict_len;
-}
-
-JNIEXPORT jstring JNICALL nativeImGetPredictItem(JNIEnv *env, jclass clazz,
-                                                 jint predict_no) {
-  jstring retstr;
-
-  if (predict_no < 0 || (size_t)predict_no >= predict_len) {
-    retstr = (*env).NewString((unsigned short*)predict_buf[0], 0);
-  } else {
-    retstr = (*env).NewString((unsigned short*)predict_buf[predict_no],
-                              utf16_strlen(predict_buf[predict_no]));
-  }
-  return retstr;
-}
-
-JNIEXPORT jboolean JNICALL nativeSyncBegin(JNIEnv *env, jclass clazz,
-                                           jbyteArray dict_file) {
-  jbyte *file_name = (*env).GetByteArrayElements(dict_file, 0);
-  if (true == sync_worker.begin((const char *)file_name))
-    return JNI_TRUE;
-  return JNI_FALSE;
-}
-
-JNIEXPORT jboolean JNICALL nativeSyncFinish(JNIEnv *env, jclass clazz) {
-  sync_worker.finish();
-  return JNI_TRUE;
-}
-
-JNIEXPORT jint JNICALL nativeSyncGetCapacity(JNIEnv *env, jclass clazz) {
-  return sync_worker.get_capacity();
-}
-
-JNIEXPORT jint JNICALL nativeSyncPutLemmas(JNIEnv *env, jclass clazz,
-                                           jstring tomerge) {
-
-  char16 *ptr = (char16*)(*env).GetStringChars(tomerge, NULL);
-  int len = (size_t)(*env).GetStringLength(tomerge);
-
-  int added = sync_worker.put_lemmas(ptr, len);
-
-  (*env).ReleaseStringChars(tomerge, ptr);
-
-  return added;
-}
-
-JNIEXPORT jstring JNICALL nativeSyncGetLemmas(JNIEnv *env, jclass clazz) {
-
-  int len = sync_worker.get_lemmas(retbuf, RET_BUF_LEN);
-  if (len == 0)
-    return NULL;
-  jstring retstr;
-  retstr = (*env).NewString((unsigned short*)retbuf, len);
-  return retstr;
-}
-
-JNIEXPORT jint JNICALL nativeSyncGetLastCount(JNIEnv *env, jclass clazz) {
-  return sync_worker.get_last_got_count();
-}
-
-JNIEXPORT jint JNICALL nativeSyncGetTotalCount(JNIEnv *env, jclass clazz) {
-  return sync_worker.get_total_count();
-}
-
-JNIEXPORT jboolean JNICALL nativeSyncClearLastGot(JNIEnv *env, jclass clazz) {
-  sync_worker.clear_last_got();
-  return JNI_TRUE;
-}
-
-/**
- * Table of methods associated with a single class.
- */
-static JNINativeMethod gMethods[] = {
-    /* name, signature, funcPtr */
-    /* ------Functions for Pinyin-to-hanzi decoding begin--------->> */
-    { "nativeImOpenDecoder", "([B[B)Z",
-            (void*) nativeImOpenDecoder },
-    { "nativeImOpenDecoderFd", "(Ljava/io/FileDescriptor;JJ[B)Z",
-            (void*) nativeImOpenDecoderFd },
-    { "nativeImSetMaxLens", "(II)V",
-            (void*) nativeImSetMaxLens },
-    { "nativeImCloseDecoder", "()Z",
-            (void*) nativeImCloseDecoder },
-    { "nativeImSearch",  "([BI)I",
-            (void*) nativeImSearch },
-    { "nativeImDelSearch",  "(IZZ)I",
-            (void*) nativeImDelSearch },
-    { "nativeImResetSearch",  "()V",
-            (void*) nativeImResetSearch },
-    { "nativeImAddLetter", "(B)I",
-            (void*) nativeImAddLetter },
-    { "nativeImGetPyStr", "(Z)Ljava/lang/String;",
-            (void*) nativeImGetPyStr },
-    { "nativeImGetPyStrLen", "(Z)I",
-            (void*) nativeImGetPyStrLen },
-    { "nativeImGetSplStart", "()[I",
-            (void*) nativeImGetSplStart },
-    { "nativeImGetChoice", "(I)Ljava/lang/String;",
-            (void*) nativeImGetChoice },
-    { "nativeImChoose", "(I)I",
-            (void*) nativeImChoose },
-    { "nativeImCancelLastChoice", "()I",
-            (void*) nativeImCancelLastChoice },
-    { "nativeImGetFixedLen", "()I",
-            (void*) nativeImGetFixedLen },
-    { "nativeImGetPredictsNum", "(Ljava/lang/String;)I",
-            (void*) nativeImGetPredictsNum },
-    { "nativeImGetPredictItem", "(I)Ljava/lang/String;",
-            (void*) nativeImGetPredictItem },
-    { "nativeImCancelInput", "()Z",
-            (void*) nativeImCancelInput },
-    { "nativeImFlushCache", "()Z",
-            (void*) nativeImFlushCache },
-    /* <<----Functions for Pinyin-to-hanzi decoding end------------- */
-
-    /* ------Functions for sync begin----------------------------->> */
-    { "nativeSyncBegin", "([B)Z",
-            (void*) nativeSyncBegin },
-    { "nativeSyncFinish", "()Z",
-            (void*) nativeSyncFinish },
-    { "nativeSyncPutLemmas", "(Ljava/lang/String;)I",
-            (void*) nativeSyncPutLemmas },
-    { "nativeSyncGetLemmas", "()Ljava/lang/String;",
-            (void*) nativeSyncGetLemmas },
-    { "nativeSyncGetLastCount", "()I",
-            (void*) nativeSyncGetLastCount },
-    { "nativeSyncGetTotalCount", "()I",
-            (void*) nativeSyncGetTotalCount },
-    { "nativeSyncClearLastGot", "()Z",
-            (void*) nativeSyncClearLastGot },
-    { "nativeSyncGetCapacity", "()I",
-            (void*) nativeSyncGetCapacity },
-    /* <<----Functions for sync end--------------------------------- */
-};
-
-
-/*
- * Register several native methods for one class.
- */
-static int registerNativeMethods(JNIEnv* env, const char* className,
-    JNINativeMethod* gMethods, int numMethods)
-{
-    jclass clazz;
-
-    clazz = (*env).FindClass(className);
-    if (clazz == NULL) {
-        return JNI_FALSE;
-    }
-    if ((*env).RegisterNatives(clazz, gMethods, numMethods) < 0) {
-        return JNI_FALSE;
-    }
-
-    clazz = env->FindClass("java/io/FileDescriptor");
-    LOG_FATAL_IF(clazz == NULL, "Unable to find Java class java.io.FileDescriptor");
-    gFileDescriptorOffsets.mClass = (jclass) env->NewGlobalRef(clazz);
-    gFileDescriptorOffsets.mDescriptor = env->GetFieldID(clazz, "descriptor", "I");
-    LOG_FATAL_IF(gFileDescriptorOffsets.mDescriptor == NULL,
-                 "Unable to find descriptor field in java.io.FileDescriptor");
-
-    return JNI_TRUE;
-}
-
-/*
- * Register native methods for all classes we know about.
- */
-static int registerNatives(JNIEnv* env)
-{
-    if (!registerNativeMethods(env,
-           "com/android/inputmethod/pinyin/PinyinDecoderService",
-            gMethods, sizeof(gMethods) / sizeof(gMethods[0])))
-        return JNI_FALSE;
-
-    return JNI_TRUE;
-}
-
-/*
- * Set some test stuff up.
- *
- * Returns the JNI version on success, -1 on failure.
- */
-JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM* vm, void* reserved)
-{
-    JNIEnv* env = NULL;
-    jint result = -1;
-
-    if ((*vm).GetEnv((void**) &env, JNI_VERSION_1_4) != JNI_OK) {
-        goto bail;
-    }
-    assert(env != NULL);
-
-    if (!registerNatives(env)) {
-        goto bail;
-    }
-
-    /* success -- return valid version number */
-    result = JNI_VERSION_1_4;
-
-bail:
-    return result;
-}
-
-#ifdef __cplusplus
-}
-#endif

PinyinIME/jni/command/Makefile

-CC=gcc
-CFLAGS= -g -Wall -std=c99
-CPP=g++
-CPPFLAGS= -g3 -Wall -lpthread
-
-PINYINIME_DICTBUILDER=pinyinime_dictbuilder
-
-LIBRARY_SRC= \
-	    ../share/dictbuilder.cpp \
-	    ../share/dictlist.cpp \
-	    ../share/dicttrie.cpp \
-	    ../share/lpicache.cpp \
-	    ../share/mystdlib.cpp \
-	    ../share/ngram.cpp \
-	    ../share/searchutility.cpp \
-	    ../share/spellingtable.cpp \
-	    ../share/spellingtrie.cpp \
-	    ../share/splparser.cpp \
-	    ../share/utf16char.cpp \
-	    ../share/utf16reader.cpp \
-
-all: engine
-
-engine: $(PINYINIME_DICTBUILDER)
-
-$(PINYINIME_DICTBUILDER): $(LIBRARY_SRC) pinyinime_dictbuilder.cpp
-	@$(CPP) $(CPPFLAGS) -o $@ $?
-
-
-clean:
-	-rm -rf $(PINYINIME_DICTBUILDER)
-
-.PHONY: clean

PinyinIME/jni/command/pinyinime_dictbuilder.cpp

-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <assert.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <time.h>
-#include <unistd.h>
-#include "../include/dicttrie.h"
-
-using namespace ime_pinyin;
-
-/**
- * Build binary dictionary model. Make sure that ___BUILD_MODEL___ is defined
- * in dictdef.h.
- */
-int main(int argc, char* argv[]) {
-  DictTrie* dict_trie = new DictTrie();
-  bool success;
-  if (argc >= 3)
-     success = dict_trie->build_dict(argv[1], argv[2]);
-  else
-     success = dict_trie->build_dict("../data/rawdict_utf16_65105_freq.txt",
-                                     "../data/valid_utf16.txt");
-
-  if (success) {
-    printf("Build dictionary successfully.\n");
-  } else {
-    printf("Build dictionary unsuccessfully.\n");
-    return -1;
-  }
-
-  success = dict_trie->save_dict("../../res/raw/dict_pinyin.dat");
-
-  if (success) {
-    printf("Save dictionary successfully.\n");
-  } else {
-    printf("Save dictionary unsuccessfully.\n");
-    return -1;
-  }
-
-  return 0;
-}

PinyinIME/jni/data/rawdict_utf16_65105_freq.txt

Binary file removed.

PinyinIME/jni/data/valid_utf16.txt

Binary file removed.

PinyinIME/jni/include/atomdictbase.h

-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * This class defines AtomDictBase class which is the base class for all atom
- * dictionaries. Atom dictionaries are managed by the decoder class
- * MatrixSearch.
- *
- * When the user appends a new character to the Pinyin string, all enabled atom
- * dictionaries' extend_dict() will be called at least once to get candidates
- * ended in this step (the information of starting step is also given in the
- * parameter). Usually, when extend_dict() is called, a MileStoneHandle object
- * returned by a previous calling for a earlier step is given to speed up the
- * look-up process, and a new MileStoneHandle object will be returned if
- * the extension is successful.
- *
- * A returned MileStoneHandle object should keep alive until Function
- * reset_milestones() is called and this object is noticed to be reset.
- *
- * Usually, the atom dictionary can use step information to manage its
- * MileStoneHandle objects, or it can make the objects in ascendant order to
- * make the reset easier.
- *
- * When the decoder loads the dictionary, it will give a starting lemma id for
- * this atom dictionary to map a inner id to a global id. Global ids should be
- * used when an atom dictionary talks to any component outside.
- */
-#ifndef PINYINIME_INCLUDE_ATOMDICTBASE_H__
-#define PINYINIME_INCLUDE_ATOMDICTBASE_H__
-
-#include <stdlib.h>
-#include "./dictdef.h"
-#include "./searchutility.h"
-
-namespace ime_pinyin {
-class AtomDictBase {
- public:
-  virtual ~AtomDictBase() {}
-
-  /**
-   * Load an atom dictionary from a file.
-   *
-   * @param file_name The file name to load dictionary.
-   * @param start_id The starting id used for this atom dictionary.
-   * @param end_id The end id (included) which can be used for this atom
-   * dictionary. User dictionary will always use the last id space, so it can
-   * ignore this paramter. All other atom dictionaries should check this
-   * parameter.
-   * @return True if succeed.
-   */
-  virtual bool load_dict(const char *file_name, LemmaIdType start_id,
-                         LemmaIdType end_id) = 0;
-
-  /**
-   * Close this atom dictionary.
-   *
-   * @return True if succeed.
-   */
-  virtual bool close_dict() = 0;
-
-  /**
-   * Get the total number of lemmas in this atom dictionary.
-   *
-   * @return The total number of lemmas.
-   */
-  virtual size_t number_of_lemmas() = 0;
-
-  /**
-   * This function is called by the decoder when user deletes a character from
-   * the input string, or begins a new input string.
-   *
-   * Different atom dictionaries may implement this function in different way.
-   * an atom dictionary can use one of these two parameters (or both) to reset
-   * its corresponding MileStoneHandle objects according its detailed
-   * implementation.
-   *
-   * For example, if an atom dictionary uses step information to manage its
-   * MileStoneHandle objects, parameter from_step can be used to identify which
-   * objects should be reset; otherwise, if another atom dictionary does not
-   * use the detailed step information, it only uses ascendant handles
-   * (according to step. For the same step, earlier call, smaller handle), it
-   * can easily reset those MileStoneHandle which are larger than from_handle.
-   *
-   * The decoder always reset the decoding state by step. So when it begins
-   * resetting, it will call reset_milestones() of its atom dictionaries with
-   * the step information, and the MileStoneHandle objects returned by the
-   * earliest calling of extend_dict() for that step.
-   *
-   * If an atom dictionary does not implement incremental search, this function
-   * can be totally ignored.
-   *
-   * @param from_step From which step(included) the MileStoneHandle
-   * objects should be reset.
-   * @param from_handle The ealiest MileStoneHandle object for step from_step
-   */
-  virtual void reset_milestones(uint16 from_step,
-                                MileStoneHandle from_handle) = 0;
-
-  /**
-   * Used to extend in this dictionary. The handle returned should keep valid
-   * until reset_milestones() is called.
-   *
-   * @param from_handle Its previous returned extended handle without the new
-   * spelling id, it can be used to speed up the extending.
-   * @param dep The paramter used for extending.
-   * @param lpi_items Used to fill in the lemmas matched.
-   * @param lpi_max The length of the buffer
-   * @param lpi_num Used to return the newly added items.
-   * @return The new mile stone for this extending. 0 if fail.
-   */
-  virtual MileStoneHandle extend_dict(MileStoneHandle from_handle,
-                                      const DictExtPara *dep,
-                                      LmaPsbItem *lpi_items,
-                                      size_t lpi_max, size_t *lpi_num) = 0;
-
-  /**
-   * Get lemma items with scores according to a spelling id stream.
-   * This atom dictionary does not need to sort the returned items.
-   *
-   * @param splid_str The spelling id stream buffer.
-   * @param splid_str_len The length of the spelling id stream buffer.
-   * @param lpi_items Used to return matched lemma items with scores.
-   * @param lpi_max The maximum size of the buffer to return result.
-   * @return The number of matched items which have been filled in to lpi_items.
-   */
-  virtual size_t get_lpis(const uint16 *splid_str, uint16 splid_str_len,
-                          LmaPsbItem *lpi_items, size_t lpi_max) = 0;
-
-  /**
-   * Get a lemma string (The Chinese string) by the given lemma id.
-   *
-   * @param id_lemma The lemma id to get the string.
-   * @param str_buf The buffer to return the Chinese string.
-   * @param str_max The maximum size of the buffer.
-   * @return The length of the string, 0 if fail.
-   */
-  virtual uint16 get_lemma_str(LemmaIdType id_lemma, char16 *str_buf,
-                               uint16 str_max) = 0;
-
-  /**
-   * Get the full spelling ids for the given lemma id.
-   * If the given buffer is too short, return 0.
-   *
-   * @param splids Used to return the spelling ids.
-   * @param splids_max The maximum buffer length of splids.
-   * @param arg_valid Used to indicate if the incoming parameters have been
-   * initialized are valid. If it is true, the splids and splids_max are valid
-   * and there may be half ids in splids to be updated to full ids. In this
-   * case, splids_max is the number of valid ids in splids.
-   * @return The number of ids in the buffer.
-   */
-  virtual uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids,
-                                  uint16 splids_max, bool arg_valid) = 0;
-
-  /**
-   * Function used for prediction.
-   * No need to sort the newly added items.
-   *
-   * @param last_hzs The last n Chinese chracters(called Hanzi), its length
-   * should be less than or equal to kMaxPredictSize.
-   * @param hzs_len specifies the length(<= kMaxPredictSize) of the history.
-   * @param npre_items Used used to return the result.
-   * @param npre_max The length of the buffer to return result
-   * @param b4_used Number of prediction result (from npre_items[-b4_used])
-   * from other atom dictionaries. A atom ditionary can just ignore it.
-   * @return The number of prediction result from this atom dictionary.
-   */
-  virtual size_t predict(const char16 last_hzs[], uint16 hzs_len,
-                         NPredictItem *npre_items, size_t npre_max,
-                         size_t b4_used) = 0;
-
-  /**
-   * Add a lemma to the dictionary. If the dictionary allows to add new
-   * items and this item does not exist, add it.
-   *
-   * @param lemma_str The Chinese string of the lemma.
-   * @param splids The spelling ids of the lemma.
-   * @param lemma_len The length of the Chinese lemma.
-   * @param count The frequency count for this lemma.
-   */
-  virtual LemmaIdType put_lemma(char16 lemma_str[], uint16 splids[],
-                                uint16 lemma_len, uint16 count) = 0;
-
-  /**
-   * Update a lemma's occuring count.
-   *
-   * @param lemma_id The lemma id to update.
-   * @param delta_count The frequnecy count to ajust.
-   * @param selected Indicate whether this lemma is selected by user and
-   * submitted to target edit box.
-   * @return The id if succeed, 0 if fail.
-   */
-  virtual LemmaIdType update_lemma(LemmaIdType lemma_id, int16 delta_count,
-                                   bool selected) = 0;
-
-  /**
-   * Get the lemma id for the given lemma.
-   *
-   * @param lemma_str The Chinese string of the lemma.
-   * @param splids The spelling ids of the lemma.
-   * @param lemma_len The length of the lemma.
-   * @return The matched lemma id, or 0 if fail.
-   */
-  virtual LemmaIdType get_lemma_id(char16 lemma_str[], uint16 splids[],
-                                   uint16 lemma_len) = 0;
-
-  /**
-   * Get the lemma score.
-   *
-   * @param lemma_id The lemma id to get score.
-   * @return The score of the lemma, or 0 if fail.
-   */
-  virtual LmaScoreType get_lemma_score(LemmaIdType lemma_id) = 0;
-
-  /**
-   * Get the lemma score.
-   *
-   * @param lemma_str The Chinese string of the lemma.
-   * @param splids The spelling ids of the lemma.
-   * @param lemma_len The length of the lemma.
-   * @return The score of the lamm, or 0 if fail.
-   */
-  virtual LmaScoreType get_lemma_score(char16 lemma_str[], uint16 splids[],
-                                uint16 lemma_len) = 0;
-
-  /**
-   * If the dictionary allowed, remove a lemma from it.
-   *
-   * @param lemma_id The id of the lemma to remove.
-   * @return True if succeed.
-   */
-  virtual bool remove_lemma(LemmaIdType lemma_id) = 0;
-
-  /**
-   * Get the total occuring count of this atom dictionary.
-   *
-   * @return The total occuring count of this atom dictionary.
-   */
-  virtual size_t get_total_lemma_count() = 0;
-
-  /**
-   * Set the total occuring count of other atom dictionaries.
-   *
-   * @param count The total occuring count of other atom dictionaies.
-   */
-  virtual void set_total_lemma_count_of_others(size_t count) = 0;
-
-  /**
-   * Notify this atom dictionary to flush the cached data to persistent storage
-   * if necessary.
-   */
-  virtual void flush_cache() = 0;
-};
-}
-
-#endif  // PINYINIME_INCLUDE_ATOMDICTBASE_H__

PinyinIME/jni/include/dictbuilder.h

-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_INCLUDE_DICTBUILDER_H__
-#define PINYINIME_INCLUDE_DICTBUILDER_H__
-
-#include <stdlib.h>
-#include "./utf16char.h"
-#include "./dictdef.h"
-#include "./dictlist.h"
-#include "./spellingtable.h"
-#include "./spellingtrie.h"
-#include "./splparser.h"
-
-namespace ime_pinyin {
-
-#ifdef ___BUILD_MODEL___
-
-#define ___DO_STATISTICS___
-
-class DictTrie;
-
-class DictBuilder {
- private:
-  // The raw lemma array buffer.
-  LemmaEntry *lemma_arr_;
-  size_t lemma_num_;
-
-  // Used to store all possible single char items.
-  // Two items may have the same Hanzi while their spelling ids are different.
-  SingleCharItem *scis_;
-  size_t scis_num_;
-
-  // In the tree, root's level is -1.
-  // Lemma nodes for root, and level 0
-  LmaNodeLE0 *lma_nodes_le0_;
-
-  // Lemma nodes for layers whose levels are deeper than 0
-  LmaNodeGE1 *lma_nodes_ge1_;
-
-  // Number of used lemma nodes
-  size_t lma_nds_used_num_le0_;
-  size_t lma_nds_used_num_ge1_;
-
-  // Used to store homophonies' ids.
-  LemmaIdType *homo_idx_buf_;
-  // Number of homophonies each of which only contains one Chinese character.
-  size_t homo_idx_num_eq1_;
-  // Number of homophonies each of which contains more than one character.
-  size_t homo_idx_num_gt1_;
-
-  // The items with highest scores.
-  LemmaEntry *top_lmas_;
-  size_t top_lmas_num_;
-
-  SpellingTable *spl_table_;
-  SpellingParser *spl_parser_;
-
-#ifdef ___DO_STATISTICS___
-  size_t max_sonbuf_len_[kMaxLemmaSize];
-  size_t max_homobuf_len_[kMaxLemmaSize];
-
-  size_t total_son_num_[kMaxLemmaSize];
-  size_t total_node_hasson_[kMaxLemmaSize];
-  size_t total_sonbuf_num_[kMaxLemmaSize];
-  size_t total_sonbuf_allnoson_[kMaxLemmaSize];
-  size_t total_node_in_sonbuf_allnoson_[kMaxLemmaSize];
-  size_t total_homo_num_[kMaxLemmaSize];
-
-  size_t sonbufs_num1_;     // Number of son buffer with only 1 son
-  size_t sonbufs_numgt1_;   // Number of son buffer with more 1 son;
-
-  size_t total_lma_node_num_;
-
-  void stat_init();
-  void stat_print();
-#endif
-
- public:
-
-  DictBuilder();
-  ~DictBuilder();
-
-  // Build dictionary trie from the file fn_raw. File fn_validhzs provides
-  // valid chars. If fn_validhzs is NULL, only chars in GB2312 will be
-  // included.
-  bool build_dict(const char* fn_raw, const char* fn_validhzs,
-                  DictTrie *dict_trie);
-
- private:
-  // Fill in the buffer with id. The caller guarantees that the paramters are
-  // vaild.
-  void id_to_charbuf(unsigned char *buf, LemmaIdType id);
-
-  // Update the offset of sons for a node.
-  void set_son_offset(LmaNodeGE1 *node, size_t offset);
-
-  // Update the offset of homophonies' ids for a node.
-  void set_homo_id_buf_offset(LmaNodeGE1 *node, size_t offset);
-
-  // Format a speling string.
-  void format_spelling_str(char *spl_str);
-
-  // Sort the lemma_arr by the hanzi string, and give each of unique items
-  // a id. Why we need to sort the lemma list according to their Hanzi string
-  // is to find items started by a given prefix string to do prediction.
-  // Actually, the single char items are be in other order, for example,
-  // in spelling id order, etc.
-  // Return value is next un-allocated idx available.
-  LemmaIdType sort_lemmas_by_hz();
-
-  // Build the SingleCharItem list, and fill the hanzi_scis_ids in the
-  // lemma buffer lemma_arr_.
-  // This function should be called after the lemma array is ready.
-  // Return the number of unique SingleCharItem elements.
-  size_t build_scis();
-
-  // Construct a subtree using a subset of the spelling array (from
-  // item_star to item_end)
-  // parent is the parent node to update the necessary information
-  // parent can be a member of LmaNodeLE0 or LmaNodeGE1
-  bool construct_subset(void* parent, LemmaEntry* lemma_arr,
-                        size_t item_start, size_t item_end, size_t level);
-
-
-  // Read valid Chinese Hanzis from the given file.
-  // num is used to return number of chars.
-  // The return buffer is sorted and caller needs to free the returned buffer.
-  char16* read_valid_hanzis(const char *fn_validhzs, size_t *num);
-
-
-  // Read a raw dictionary. max_item is the maximum number of items. If there
-  // are more items in the ditionary, only the first max_item will be read.
-  // Returned value is the number of items successfully read from the file.
-  size_t read_raw_dict(const char* fn_raw, const char *fn_validhzs,
-                       size_t max_item);
-
-  // Try to find if a character is in hzs buffer.
-  bool hz_in_hanzis_list(const char16 *hzs, size_t hzs_len, char16 hz);
-
-  // Try to find if all characters in str are in hzs buffer.
-  bool str_in_hanzis_list(const char16 *hzs, size_t hzs_len,
-                          const char16 *str, size_t str_len);
-
-  // Get these lemmas with toppest scores.
-  void get_top_lemmas();
-
-  // Allocate resource to build dictionary.
-  // lma_num is the number of items to be loaded
-  bool alloc_resource(size_t lma_num);
-
-  // Free resource.
-  void free_resource();
-};
-#endif  // ___BUILD_MODEL___
-}
-
-#endif  // PINYINIME_INCLUDE_DICTBUILDER_H__

PinyinIME/jni/include/dictdef.h

-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_INCLUDE_DICTDEF_H__
-#define PINYINIME_INCLUDE_DICTDEF_H__
-
-#include <stdlib.h>
-#include "./utf16char.h"
-
-namespace ime_pinyin {
-
-// Enable the following line when building the binary dictionary model.
-// #define ___BUILD_MODEL___
-
-typedef unsigned char      uint8;
-typedef unsigned short     uint16;
-typedef unsigned int       uint32;
-
-typedef signed char        int8;
-typedef short              int16;
-typedef int                int32;
-typedef long long          int64;
-typedef unsigned long long uint64;
-
-const bool kPrintDebug0 = false;
-const bool kPrintDebug1 = false;
-const bool kPrintDebug2 = false;
-
-// The max length of a lemma.
-const size_t kMaxLemmaSize = 8;
-
-// The max length of a Pinyin (spelling).
-const size_t kMaxPinyinSize = 6;
-
-// The number of half spelling ids. For Chinese Pinyin, there 30 half ids.
-// See SpellingTrie.h for details.
-const size_t kHalfSpellingIdNum = 29;
-
-// The maximum number of full spellings. For Chinese Pinyin, there are only
-// about 410 spellings.
-// If change this value is bigger(needs more bits), please also update
-// other structures like SpellingNode, to make sure than a spelling id can be
-// stored.
-// -1 is because that 0 is never used.
-const size_t kMaxSpellingNum = 512 - kHalfSpellingIdNum - 1;
-const size_t kMaxSearchSteps = 40;
-
-// One character predicts its following characters.
-const size_t kMaxPredictSize = (kMaxLemmaSize - 1);
-
-// LemmaIdType must always be size_t.
-typedef size_t LemmaIdType;
-const size_t kLemmaIdSize = 3;  // Actually, a Id occupies 3 bytes in storage.
-const size_t kLemmaIdComposing = 0xffffff;
-
-typedef uint16 LmaScoreType;
-typedef uint16 KeyScoreType;
-
-// Number of items with highest score are kept for prediction purpose.
-const size_t kTopScoreLemmaNum = 10;
-
-const size_t kMaxPredictNumByGt3 = 1;
-const size_t kMaxPredictNumBy3 = 2;
-const size_t kMaxPredictNumBy2 = 2;
-
-// The last lemma id (included) for the system dictionary. The system
-// dictionary's ids always start from 1.
-const LemmaIdType kSysDictIdEnd = 500000;
-
-// The first lemma id for the user dictionary.
-const LemmaIdType kUserDictIdStart = 500001;
-
-// The last lemma id (included) for the user dictionary.
-const LemmaIdType kUserDictIdEnd = 600000;
-
-typedef struct {
-  uint16 half_splid:5;
-  uint16 full_splid:11;
-} SpellingId, *PSpellingId;
-
-
-/**
- * We use different node types for different layers
- * Statistical data of the building result for a testing dictionary:
- *                              root,   level 0,   level 1,   level 2,   level 3
- * max son num of one node:     406        280         41          2          -
- * max homo num of one node:      0         90         23          2          2
- * total node num of a layer:     1        406      31766      13516        993
- * total homo num of a layer:     9       5674      44609      12667        995
- *
- * The node number for root and level 0 won't be larger than 500
- * According to the information above, two kinds of nodes can be used; one for
- * root and level 0, the other for these layers deeper than 0.
- *
- * LE = less and equal,
- * A node occupies 16 bytes. so, totallly less than 16 * 500 = 8K
- */
-struct LmaNodeLE0 {
-  size_t son_1st_off;
-  size_t homo_idx_buf_off;
-  uint16 spl_idx;
-  uint16 num_of_son;
-  uint16 num_of_homo;
-};
-
-/**
- * GE = great and equal
- * A node occupies 8 bytes.
- */
-struct LmaNodeGE1 {
-  uint16 son_1st_off_l;        // Low bits of the son_1st_off
-  uint16 homo_idx_buf_off_l;   // Low bits of the homo_idx_buf_off_1
-  uint16 spl_idx;
-  unsigned char num_of_son;            // number of son nodes
-  unsigned char num_of_homo;           // number of homo words
-  unsigned char son_1st_off_h;         // high bits of the son_1st_off
-  unsigned char homo_idx_buf_off_h;    // high bits of the homo_idx_buf_off
-};
-
-#ifdef ___BUILD_MODEL___
-struct SingleCharItem {
-  float freq;
-  char16 hz;
-  SpellingId splid;
-};
-
-struct LemmaEntry {
-  LemmaIdType idx_by_py;
-  LemmaIdType idx_by_hz;
-  char16 hanzi_str[kMaxLemmaSize + 1];
-
-  // The SingleCharItem id for each Hanzi.
-  uint16 hanzi_scis_ids[kMaxLemmaSize];
-
-  uint16 spl_idx_arr[kMaxLemmaSize + 1];
-  char pinyin_str[kMaxLemmaSize][kMaxPinyinSize + 1];
-  unsigned char hz_str_len;
-  float freq;
-};
-#endif  // ___BUILD_MODEL___
-
-}  //  namespace ime_pinyin
-
-#endif  // PINYINIME_INCLUDE_DICTDEF_H__

PinyinIME/jni/include/dictlist.h

-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_INCLUDE_DICTLIST_H__
-#define PINYINIME_INCLUDE_DICTLIST_H__
-
-#include <stdlib.h>
-#include <stdio.h>
-#include "./dictdef.h"
-#include "./searchutility.h"
-#include "./spellingtrie.h"
-#include "./utf16char.h"
-
-namespace ime_pinyin {
-
-class DictList {
- private:
-  bool initialized_;
-
-  const SpellingTrie *spl_trie_;
-
-  // Number of SingCharItem. The first is blank, because id 0 is invalid.
-  size_t scis_num_;
-  char16 *scis_hz_;
-  SpellingId *scis_splid_;
-
-  // The large memory block to store the word list.
-  char16 *buf_;
-
-  // Starting position of those words whose lengths are i+1, counted in
-  // char16
-  size_t start_pos_[kMaxLemmaSize + 1];
-
-  size_t start_id_[kMaxLemmaSize + 1];
-
-  int (*cmp_func_[kMaxLemmaSize])(const void *, const void *);
-
-  bool alloc_resource(size_t buf_size, size_t scim_num);
-
-  void free_resource();
-
-#ifdef ___BUILD_MODEL___
-  // Calculate the requsted memory, including the start_pos[] buffer.
-  size_t calculate_size(const LemmaEntry *lemma_arr, size_t lemma_num);
-
-  void fill_scis(const SingleCharItem *scis, size_t scis_num);
-
-  // Copy the related content to the inner buffer
-  // It should be called after calculate_size()
-  void fill_list(const LemmaEntry *lemma_arr, size_t lemma_num);
-
-  // Find the starting position for the buffer of those 2-character Chinese word
-  // whose first character is the given Chinese character.
-  char16* find_pos2_startedbyhz(char16 hz_char);
-#endif
-
-  // Find the starting position for the buffer of those words whose lengths are
-  // word_len. The given parameter cmp_func decides how many characters from
-  // beginning will be used to compare.
-  char16* find_pos_startedbyhzs(const char16 last_hzs[],
-                                size_t word_Len,
-                                int (*cmp_func)(const void *, const void *));
-
- public:
-
-  DictList();
-  ~DictList();
-
-  bool save_list(FILE *fp);
-  bool load_list(FILE *fp);
-
-#ifdef ___BUILD_MODEL___
-  // Init the list from the LemmaEntry array.
-  // lemma_arr should have been sorted by the hanzi_str, and have been given
-  // ids from 1
-  bool init_list(const SingleCharItem *scis, size_t scis_num,
-                 const LemmaEntry *lemma_arr, size_t lemma_num);
-#endif
-
-  // Get the hanzi string for the given id
-  uint16 get_lemma_str(LemmaIdType id_hz, char16 *str_buf, uint16 str_max);
-
-  void convert_to_hanzis(char16 *str, uint16 str_len);
-
-  void convert_to_scis_ids(char16 *str, uint16 str_len);
-
-  // last_hzs stores the last n Chinese characters history, its length should be
-  // less or equal than kMaxPredictSize.
-  // hzs_len specifies the length(<= kMaxPredictSize).
-  // predict_buf is used to store the result.
-  // buf_len specifies the buffer length.
-  // b4_used specifies how many items before predict_buf have been used.
-  // Returned value is the number of newly added items.
-  size_t predict(const char16 last_hzs[], uint16 hzs_len,
-                 NPredictItem *npre_items, size_t npre_max,
-                 size_t b4_used);
-
-  // If half_splid is a valid half spelling id, return those full spelling
-  // ids which share this half id.
-  uint16 get_splids_for_hanzi(char16 hanzi, uint16 half_splid,
-                              uint16 *splids, uint16 max_splids);
-
-  LemmaIdType get_lemma_id(const char16 *str, uint16 str_len);
-};
-}
-
-#endif  // PINYINIME_INCLUDE_DICTLIST_H__

PinyinIME/jni/include/dicttrie.h

-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_INCLUDE_DICTTRIE_H__
-#define PINYINIME_INCLUDE_DICTTRIE_H__
-
-#include <stdlib.h>
-#include "./atomdictbase.h"
-#include "./dictdef.h"
-#include "./dictlist.h"
-#include "./searchutility.h"
-
-namespace ime_pinyin {
-
-class DictTrie : AtomDictBase {
- private:
-  typedef struct ParsingMark {
-    size_t node_offset:24;
-    size_t node_num:8;           // Number of nodes with this spelling id given
-                                 // by spl_id. If spl_id is a Shengmu, for nodes
-                                 // in the first layer of DictTrie, it equals to
-                                 // SpellingTrie::shm2full_num(); but for those
-                                 // nodes which are not in the first layer,
-                                 // node_num < SpellingTrie::shm2full_num().
-                                 // For a full spelling id, node_num = 1;
-  };
-
-  // Used to indicate an extended mile stone.
-  // An extended mile stone is used to mark a partial match in the dictionary
-  // trie to speed up further potential extending.
-  // For example, when the user inputs "w", a mile stone is created to mark the
-  // partial match status, so that when user inputs another char 'm', it will be
-  // faster to extend search space based on this mile stone.
-  //
-  // For partial match status of "wm", there can be more than one sub mile
-  // stone, for example, "wm" can be matched to "wanm", "wom", ..., etc, so
-  // there may be more one parsing mark used to mark these partial matchings.
-  // A mile stone records the starting position in the mark list and number of
-  // marks.
-  struct MileStone {
-    uint16 mark_start;
-    uint16 mark_num;