Commits

Anonymous committed b9fa7f6

CPP: Ported alternate phone number format support.

Comments (0)

Files changed (3)

cpp/src/phonenumbers/phonenumbermatcher.cc

 #endif  // USE_ICU_REGEXP
 
 #include <ctype.h>
+#include <map>
+#include <iostream>
 #include <limits>
 #include <stddef.h>
 #include <string>
 #include "base/logging.h"
 #include "base/memory/scoped_ptr.h"
 #include "base/memory/singleton.h"
+#include "phonenumbers/alternate_format.h"
 #include "phonenumbers/callback.h"
 #include "phonenumbers/default_logger.h"
 #include "phonenumbers/encoding_utils.h"
 #include "phonenumbers/regexp_adapter_re2.h"
 #endif  // USE_RE2_AND_ICU
 
+using std::cerr;
+using std::endl;
+using std::make_pair;
+using std::map;
 using std::numeric_limits;
 using std::string;
 using std::vector;
     return normalized_candidate.substr(from_index)
         .find(phone_number.extension()) != string::npos;
 }
+
+bool LoadAlternateFormats(PhoneMetadataCollection* alternate_formats) {
+  if (!alternate_formats->ParseFromArray(alternate_format_get(),
+                                         alternate_format_size())) {
+    cerr << "Could not parse binary data." << endl;
+    return false;
+  }
+  return true;
+}
 }  // namespace
 
 #ifdef USE_GOOGLE_BASE
   DISALLOW_COPY_AND_ASSIGN(PhoneNumberMatcherRegExps);
 };
 
+#ifdef USE_GOOGLE_BASE
+class AlternateFormats {
+  friend struct DefaultSingletonTraits<AlternateFormats>;
+#else
+class AlternateFormats : public Singleton<AlternateFormats> {
+  friend class Singleton<AlternateFormats>;
+#endif  // USE_GOOGLE_BASE
+ public:
+  PhoneMetadataCollection format_data_;
+
+  map<int, const PhoneMetadata*> calling_code_to_alternate_formats_map_;
+
+#ifdef USE_GOOGLE_BASE
+  static AlternateFormats* GetInstance() {
+    return Singleton<AlternateFormats>::get();
+  }
+#endif  // USE_GOOGLE_BASE
+
+  AlternateFormats()
+      : format_data_(),
+        calling_code_to_alternate_formats_map_() {
+    if (!LoadAlternateFormats(&format_data_)) {
+      LOG(DFATAL) << "Could not parse compiled-in metadata.";
+      return;
+    }
+    for (RepeatedPtrField<PhoneMetadata>::const_iterator it =
+             format_data_.metadata().begin();
+         it != format_data_.metadata().end();
+         ++it) {
+      calling_code_to_alternate_formats_map_.insert(
+          make_pair(it->country_code(), &*it));
+    }
+  }
+
+  const PhoneMetadata* GetAlternateFormatsForCountry(int country_calling_code)
+      const {
+    map<int, const PhoneMetadata*>::const_iterator it =
+        calling_code_to_alternate_formats_map_.find(country_calling_code);
+    if (it != calling_code_to_alternate_formats_map_.end()) {
+      return it->second;
+    }
+    return NULL;
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(AlternateFormats);
+};
+
 PhoneNumberMatcher::PhoneNumberMatcher(const PhoneNumberUtil& util,
                                        const string& text,
                                        const string& region_code,
                                        PhoneNumberMatcher::Leniency leniency,
                                        int max_tries)
     : reg_exps_(PhoneNumberMatcherRegExps::GetInstance()),
+      alternate_formats_(AlternateFormats::GetInstance()),
       phone_util_(util),
       text_(text),
       preferred_region_(region_code),
 PhoneNumberMatcher::PhoneNumberMatcher(const string& text,
                                        const string& region_code)
     : reg_exps_(PhoneNumberMatcherRegExps::GetInstance()),
+      alternate_formats_(NULL),  // Not used.
       phone_util_(*PhoneNumberUtil::GetInstance()),
       text_(text),
       preferred_region_(region_code),
                    formatted_number_groups)) {
     return true;
   }
+  // If this didn't pass, see if there are any alternate formats, and try them
+  // instead.
+  const PhoneMetadata* alternate_formats =
+    alternate_formats_->GetAlternateFormatsForCountry(
+        phone_number.country_code());
+  if (alternate_formats) {
+    for (RepeatedPtrField<NumberFormat>::const_iterator it =
+             alternate_formats->number_format().begin();
+         it != alternate_formats->number_format().end(); ++it) {
+      formatted_number_groups.clear();
+      GetNationalNumberGroups(phone_number, &*it, &formatted_number_groups);
+      if (checker->Run(phone_util_, phone_number, normalized_candidate,
+                       formatted_number_groups)) {
+        return true;
+      }
+    }
+  }
   return false;
 }
 

cpp/src/phonenumbers/phonenumbermatcher.h

 using std::string;
 using std::vector;
 
+class AlternateFormats;
 class NumberFormat;
 class PhoneNumber;
 class PhoneNumberMatch;
   // Helper class holding useful regular expressions.
   const PhoneNumberMatcherRegExps* reg_exps_;
 
+  // Helper class holding loaded data containing alternate ways phone numbers
+  // might be formatted for certain regions.
+  const AlternateFormats* alternate_formats_;
+
   // The phone number utility;
   const PhoneNumberUtil& phone_util_;
 

cpp/test/phonenumbers/phonenumbermatcher_test.cc

       "\x2D\xEF\xBC\x97\xEF\xBC\x97\xEF\xBC\x97\xEF\xBC\x97", RegionCode::US()),
   NumberTest("2012-0102 08", RegionCode::US()),  // Very strange formatting.
   NumberTest("2012-01-02 08", RegionCode::US()),
-  // Breakdown assistance number.
-  NumberTest("1800-10-10 22", RegionCode::AU()),
-};
+  // Breakdown assistance number with unexpected formatting.
+  NumberTest("1800-1-0-10 22", RegionCode::AU()),
+  NumberTest("030-3-2 23 12 34", RegionCode::DE()),
+  NumberTest("03 0 -3 2 23 12 34", RegionCode::DE()),
+  NumberTest("(0)3 0 -3 2 23 12 34", RegionCode::DE()),
+  NumberTest("0 3 0 -3 2 23 12 34", RegionCode::DE()),};
 
 // Strings with number-like things that should only be found up to and including
 // the "strict_grouping" leniency level.
   // Should be found by strict grouping but not exact grouping, as the last two
   // groups are formatted together as a block.
   NumberTest("0800-2491234", RegionCode::DE()),
+  // Doesn't match any formatting in the test file, but almost matches an
+  // alternate format (the last two groups have been squashed together here).
+  NumberTest("0900-1 123123", RegionCode::DE()),
+  NumberTest("(0)900-1 123123", RegionCode::DE()),
+  NumberTest("0 900-1 123123", RegionCode::DE()),
 };
 
 // Strings with number-like things that should be found at all levels.
   NumberTest("0494949 ext. 49", RegionCode::DE()),
   NumberTest("01 (33) 3461 2234", RegionCode::MX()),  // Optional NP present
   NumberTest("(33) 3461 2234", RegionCode::MX()),  // Optional NP omitted
+  // Breakdown assistance number with normal formatting.
+  NumberTest("1800-10-10 22", RegionCode::AU()),
+  // Doesn't match any formatting in the test file, but matches an alternate
+  // format exactly.
+  NumberTest("0900-1 123 123", RegionCode::DE()),
+  NumberTest("(0)900-1 123 123", RegionCode::DE()),
+  NumberTest("0 900-1 123 123", RegionCode::DE()),
 };
 
 TEST_F(PhoneNumberMatcherTest, MatchesWithPossibleLeniency) {
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.