Commits

Patrick Mézard committed ccd1cd6 Merge

Merge with r510

Comments (0)

Files changed (7)

cpp/CMakeLists.txt

   "src/phonenumbers/geocoding/geocoding_data.cc"
   "src/phonenumbers/geocoding/mapping_file_provider.cc"
   "src/phonenumbers/logger.cc"
-  "src/phonenumbers/metadata.h"          # Generated by build tools.
   "src/phonenumbers/phonemetadata.pb.cc" # Generated by Protocol Buffers.
   "src/phonenumbers/phonenumber.cc"
   "src/phonenumbers/phonenumber.pb.cc"   # Generated by Protocol Buffers.
   add_definitions (-DUSE_ICU_REGEXP)
   list (APPEND SOURCES "src/phonenumbers/regexp_adapter_icu.cc")
   # The phone number matcher needs ICU.
+  list (APPEND SOURCES "src/phonenumbers/alternate_format.cc")
   list (APPEND SOURCES "src/phonenumbers/phonenumbermatch.cc")
   list (APPEND SOURCES "src/phonenumbers/phonenumbermatcher.cc")
 endif ()
 # code generation targets.
 function (add_metadata_gen_target TARGET_NAME
                                   XML_FILE
-                                  METADATA_TYPE)
+                                  METADATA_TYPE
+                                  METADATA_HEADER)
   set (METADATA_SOURCE_DIR "${CMAKE_SOURCE_DIR}/src/phonenumbers")
   set (GEN_OUTPUT "${METADATA_SOURCE_DIR}/${METADATA_TYPE}.cc"
-                  "${METADATA_SOURCE_DIR}/metadata.h")
+                  "${METADATA_SOURCE_DIR}/${METADATA_HEADER}.h")
   set (JAR_PATH "${CMAKE_SOURCE_DIR}/../tools/java/cpp-build/target")
   set (JAR_PATH "${JAR_PATH}/cpp-build-1.0-SNAPSHOT-jar-with-dependencies.jar")
 
     ${METADATA_TARGET}
     "${RESOURCES_DIR}/PhoneNumberMetaData.xml"
     "lite_metadata"
+    "metadata"
   )
   list (APPEND SOURCES "src/phonenumbers/lite_metadata.cc")
 else ()
     ${METADATA_TARGET}
     "${RESOURCES_DIR}/PhoneNumberMetaData.xml"
     "metadata"
+    "metadata"
   )
   list (APPEND SOURCES "src/phonenumbers/metadata.cc")
 endif ()
   ${TEST_METADATA_TARGET}
   "${RESOURCES_DIR}/PhoneNumberMetaDataForTesting.xml"
   "test_metadata"
+  "metadata"
 )
 list (APPEND TESTING_LIBRARY_SOURCES "src/phonenumbers/test_metadata.cc")
 
+if (${USE_ICU_REGEXP} STREQUAL "ON")
+  # Add alternate format metadata generation for the phone number matcher
+  set (ALT_FORMAT_METADATA_TARGET "generate-alt-format-metadata")
+  add_metadata_gen_target (
+    ${ALT_FORMAT_METADATA_TARGET}
+    "${RESOURCES_DIR}/PhoneNumberAlternateFormats.xml"
+    "alternate_format"
+    "alternate_format"
+  )
+endif ()
+
 if (NOT WIN32)
   add_definitions ("-Wall -Werror")
 endif ()
 # Build a static library (without -fPIC).
 add_library (phonenumber STATIC ${SOURCES})
 add_dependencies (phonenumber generate-sources ${METADATA_TARGET})
+if (${USE_ICU_REGEXP} STREQUAL "ON")
+  add_dependencies (phonenumber ${ALT_FORMAT_METADATA_TARGET})
+endif ()
 
 # Build a shared library (with -fPIC).
 set (BUILD_SHARED_LIB true)
 if (BUILD_SHARED_LIB)
   add_library (phonenumber-shared SHARED ${SOURCES})
   add_dependencies (phonenumber-shared generate-sources ${METADATA_TARGET})
+  if (${USE_ICU_REGEXP} STREQUAL "ON")
+    add_dependencies (phonenumber ${ALT_FORMAT_METADATA_TARGET})
+  endif ()
   set_target_properties (phonenumber-shared
     PROPERTIES
       OUTPUT_NAME "phonenumber"

cpp/src/phonenumbers/phonenumbermatcher.cc

 #endif  // USE_ICU_REGEXP
 
 #include <ctype.h>
+#include <map>
+#include <iostream>
 #include <limits>
 #include <stddef.h>
 #include <string>
 #include "base/logging.h"
 #include "base/memory/scoped_ptr.h"
 #include "base/memory/singleton.h"
+#include "phonenumbers/alternate_format.h"
 #include "phonenumbers/callback.h"
 #include "phonenumbers/default_logger.h"
 #include "phonenumbers/encoding_utils.h"
 #include "phonenumbers/regexp_adapter_re2.h"
 #endif  // USE_RE2_AND_ICU
 
+using std::cerr;
+using std::endl;
+using std::make_pair;
+using std::map;
 using std::numeric_limits;
 using std::string;
 using std::vector;
     return normalized_candidate.substr(from_index)
         .find(phone_number.extension()) != string::npos;
 }
+
+bool LoadAlternateFormats(PhoneMetadataCollection* alternate_formats) {
+  if (!alternate_formats->ParseFromArray(alternate_format_get(),
+                                         alternate_format_size())) {
+    cerr << "Could not parse binary data." << endl;
+    return false;
+  }
+  return true;
+}
 }  // namespace
 
 #ifdef USE_GOOGLE_BASE
   DISALLOW_COPY_AND_ASSIGN(PhoneNumberMatcherRegExps);
 };
 
+#ifdef USE_GOOGLE_BASE
+class AlternateFormats {
+  friend struct DefaultSingletonTraits<AlternateFormats>;
+#else
+class AlternateFormats : public Singleton<AlternateFormats> {
+  friend class Singleton<AlternateFormats>;
+#endif  // USE_GOOGLE_BASE
+ public:
+  PhoneMetadataCollection format_data_;
+
+  map<int, const PhoneMetadata*> calling_code_to_alternate_formats_map_;
+
+#ifdef USE_GOOGLE_BASE
+  static AlternateFormats* GetInstance() {
+    return Singleton<AlternateFormats>::get();
+  }
+#endif  // USE_GOOGLE_BASE
+
+  AlternateFormats()
+      : format_data_(),
+        calling_code_to_alternate_formats_map_() {
+    if (!LoadAlternateFormats(&format_data_)) {
+      LOG(DFATAL) << "Could not parse compiled-in metadata.";
+      return;
+    }
+    for (RepeatedPtrField<PhoneMetadata>::const_iterator it =
+             format_data_.metadata().begin();
+         it != format_data_.metadata().end();
+         ++it) {
+      calling_code_to_alternate_formats_map_.insert(
+          make_pair(it->country_code(), &*it));
+    }
+  }
+
+  const PhoneMetadata* GetAlternateFormatsForCountry(int country_calling_code)
+      const {
+    map<int, const PhoneMetadata*>::const_iterator it =
+        calling_code_to_alternate_formats_map_.find(country_calling_code);
+    if (it != calling_code_to_alternate_formats_map_.end()) {
+      return it->second;
+    }
+    return NULL;
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(AlternateFormats);
+};
+
 PhoneNumberMatcher::PhoneNumberMatcher(const PhoneNumberUtil& util,
                                        const string& text,
                                        const string& region_code,
                                        PhoneNumberMatcher::Leniency leniency,
                                        int max_tries)
     : reg_exps_(PhoneNumberMatcherRegExps::GetInstance()),
+      alternate_formats_(AlternateFormats::GetInstance()),
       phone_util_(util),
       text_(text),
       preferred_region_(region_code),
 PhoneNumberMatcher::PhoneNumberMatcher(const string& text,
                                        const string& region_code)
     : reg_exps_(PhoneNumberMatcherRegExps::GetInstance()),
+      alternate_formats_(NULL),  // Not used.
       phone_util_(*PhoneNumberUtil::GetInstance()),
       text_(text),
       preferred_region_(region_code),
                    formatted_number_groups)) {
     return true;
   }
+  // If this didn't pass, see if there are any alternate formats, and try them
+  // instead.
+  const PhoneMetadata* alternate_formats =
+    alternate_formats_->GetAlternateFormatsForCountry(
+        phone_number.country_code());
+  if (alternate_formats) {
+    for (RepeatedPtrField<NumberFormat>::const_iterator it =
+             alternate_formats->number_format().begin();
+         it != alternate_formats->number_format().end(); ++it) {
+      formatted_number_groups.clear();
+      GetNationalNumberGroups(phone_number, &*it, &formatted_number_groups);
+      if (checker->Run(phone_util_, phone_number, normalized_candidate,
+                       formatted_number_groups)) {
+        return true;
+      }
+    }
+  }
   return false;
 }
 

cpp/src/phonenumbers/phonenumbermatcher.h

 using std::string;
 using std::vector;
 
+class AlternateFormats;
 class NumberFormat;
 class PhoneNumber;
 class PhoneNumberMatch;
   // Helper class holding useful regular expressions.
   const PhoneNumberMatcherRegExps* reg_exps_;
 
+  // Helper class holding loaded data containing alternate ways phone numbers
+  // might be formatted for certain regions.
+  const AlternateFormats* alternate_formats_;
+
   // The phone number utility;
   const PhoneNumberUtil& phone_util_;
 

cpp/test/phonenumbers/geocoding/geocoding_data_test.cc

   ASSERT_EQ(2, desc_1_de->prefixes_size);
   const int32 expected_prefixes[] = {1201, 1650};
   const char* expected_descriptions[] = {
-    "New Jerse",
-    "Kalifornie",
+    "New Jersey",
+    "Kalifornien",
   };
   for (int i = 0; i < desc_1_de->prefixes_size; ++i) {
     EXPECT_EQ(expected_prefixes[i], desc_1_de->prefixes[i]);

cpp/test/phonenumbers/phonenumbermatcher_test.cc

       "\x2D\xEF\xBC\x97\xEF\xBC\x97\xEF\xBC\x97\xEF\xBC\x97", RegionCode::US()),
   NumberTest("2012-0102 08", RegionCode::US()),  // Very strange formatting.
   NumberTest("2012-01-02 08", RegionCode::US()),
-  // Breakdown assistance number.
-  NumberTest("1800-10-10 22", RegionCode::AU()),
-};
+  // Breakdown assistance number with unexpected formatting.
+  NumberTest("1800-1-0-10 22", RegionCode::AU()),
+  NumberTest("030-3-2 23 12 34", RegionCode::DE()),
+  NumberTest("03 0 -3 2 23 12 34", RegionCode::DE()),
+  NumberTest("(0)3 0 -3 2 23 12 34", RegionCode::DE()),
+  NumberTest("0 3 0 -3 2 23 12 34", RegionCode::DE()),};
 
 // Strings with number-like things that should only be found up to and including
 // the "strict_grouping" leniency level.
   // Should be found by strict grouping but not exact grouping, as the last two
   // groups are formatted together as a block.
   NumberTest("0800-2491234", RegionCode::DE()),
+  // Doesn't match any formatting in the test file, but almost matches an
+  // alternate format (the last two groups have been squashed together here).
+  NumberTest("0900-1 123123", RegionCode::DE()),
+  NumberTest("(0)900-1 123123", RegionCode::DE()),
+  NumberTest("0 900-1 123123", RegionCode::DE()),
 };
 
 // Strings with number-like things that should be found at all levels.
   NumberTest("0494949 ext. 49", RegionCode::DE()),
   NumberTest("01 (33) 3461 2234", RegionCode::MX()),  // Optional NP present
   NumberTest("(33) 3461 2234", RegionCode::MX()),  // Optional NP omitted
+  // Breakdown assistance number with normal formatting.
+  NumberTest("1800-10-10 22", RegionCode::AU()),
+  // Doesn't match any formatting in the test file, but matches an alternate
+  // format exactly.
+  NumberTest("0900-1 123 123", RegionCode::DE()),
+  NumberTest("(0)900-1 123 123", RegionCode::DE()),
+  NumberTest("0 900-1 123 123", RegionCode::DE()),
 };
 
 TEST_F(PhoneNumberMatcherTest, MatchesWithPossibleLeniency) {

csharp/PhoneNumbers/Properties/AssemblyInfo.cs

 //
 // You can specify all the values or you can default the Build and Revision Numbers 
 // by using the '*' as shown below:
-// [assembly: AssemblyVersion("4.10.0.57")]
-[assembly: AssemblyVersion("4.10.0.57")]
-[assembly: AssemblyFileVersion("4.10.0.57")]
+// [assembly: AssemblyVersion("4.10.0.58")]
+[assembly: AssemblyVersion("4.10.0.58")]
+[assembly: AssemblyFileVersion("4.10.0.58")]

tools/cpp/src/cpp-build/generate_geocoding_data.cc

       continue;
     }
     --end;
-    if (*end != '\n') {
-      if (!feof(input)) {
-        // A line without LF can only happen at the end of file.
-        return false;
-      }
-    } else {
-      // Consume the LF.
-      --end;
+    if (*end != '\n' && !feof(input)) {
+      // A line without LF can only happen at the end of file.
+      return false;
     }
 
     // Trim and check for comments.