Commits

Patrick Mézard committed 80b5cec Merge

Merge with r407

Comments (0)

Files changed (5)

cpp/src/phonenumbers/phonenumberutil.cc

 #include <unicode/uchar.h>
 #include <unicode/utf8.h>
 
+#include "base/basictypes.h"
 #include "base/logging.h"
 #include "base/memory/singleton.h"
 #include "phonenumbers/asyoutypeformatter.h"
 
 namespace {
 
-scoped_ptr<Logger> logger_;
-scoped_ptr<const AbstractRegExpFactory> regexp_factory;
-scoped_ptr<RegExpCache> regexp_cache;
-
-// These objects are created in the function InitializeStaticMapsAndSets.
-
-// A map that contains characters that are essential when dialling. That means
-// any of the characters in this map must not be removed from a number when
-// dialing, otherwise the call will not reach the intended destination.
-scoped_ptr<map<char32, char> > diallable_char_mappings;
-// These mappings map a character (key) to a specific digit that should replace
-// it for normalization purposes.
-scoped_ptr<map<char32, char> > alpha_mappings;
-// For performance reasons, store a map of combining alpha_mappings with ASCII
-// digits.
-scoped_ptr<map<char32, char> > alpha_phone_mappings;
-// Separate map of all symbols that we wish to retain when formatting alpha
-// numbers. This includes digits, ascii letters and number grouping symbols such
-// as "-" and " ".
-scoped_ptr<map<char32, char> > all_plus_number_grouping_symbols;
-
 // The prefix that needs to be inserted in front of a Colombian landline
 // number when dialed from a mobile phone in Colombia.
 const char kColombiaMobileToFixedLinePrefix[] = "3";
 // The kPlusSign signifies the international prefix.
 const char kPlusSign[] = "+";
 
-scoped_ptr<const RegExp> plus_chars_pattern;
-
 const char kRfc3966ExtnPrefix[] = ";ext=";
 
-// Pattern that makes it easy to distinguish whether a region has a unique
-// international dialing prefix or not. If a region has a unique international
-// prefix (e.g. 011 in USA), it will be represented as a string that contains a
-// sequence of ASCII digits. If there are multiple available international
-// prefixes in a region, they will be represented as a regex string that always
-// contains character(s) other than ASCII digits.
-// Note this regex also includes tilde, which signals waiting for the tone.
-scoped_ptr<const RegExp> unique_international_prefix;
-
 const char kDigits[] = "\\p{Nd}";
-scoped_ptr<const RegExp> digits_pattern;
 // We accept alpha characters in phone numbers, ASCII only. We store lower-case
 // here only since our regular expressions are case-insensitive.
 const char kValidAlpha[] = "a-z";
-scoped_ptr<const RegExp> capturing_digit_pattern;
-scoped_ptr<const RegExp> capturing_ascii_digits_pattern;
-
-// Regular expression of acceptable characters that may start a phone number
-// for the purposes of parsing. This allows us to strip away meaningless
-// prefixes to phone numbers that may be mistakenly given to us. This
-// consists of digits, the plus symbol and arabic-indic digits. This does
-// not contain alpha characters, although they may be used later in the
-// number. It also does not include other punctuation, as this will be
-// stripped later during parsing and is of no information value when parsing
-// a number. The string starting with this valid character is captured.
-// This corresponds to VALID_START_CHAR in the java version.
-scoped_ptr<const string> valid_start_char;
-scoped_ptr<const RegExp> valid_start_char_pattern;
-
-// Regular expression of valid characters before a marker that might indicate a
-// second number.
-scoped_ptr<const RegExp> capture_up_to_second_number_start_pattern;
-
-// Regular expression of trailing characters that we want to remove. We remove
-// all characters that are not alpha or numerical characters. The hash
-// character is retained here, as it may signify the previous block was an
-// extension. Note the capturing block at the start to capture the rest of the
-// number if this was a match.
-// This corresponds to UNWANTED_END_CHARS in the java version.
-const char kUnwantedEndChar[] = "[^\\p{N}\\p{L}#]";
-scoped_ptr<const RegExp> unwanted_end_char_pattern;
-
-// Regular expression of groups of valid punctuation characters.
-scoped_ptr<const RegExp> separator_pattern;
-
-// Regular expression of viable phone numbers. This is location independent.
-// Checks we have at least three leading digits, and only valid punctuation,
-// alpha characters and digits in the phone number. Does not include extension
-// data. The symbol 'x' is allowed here as valid punctuation since it is often
-// used as a placeholder for carrier codes, for example in Brazilian phone
-// numbers. We also allow multiple plus-signs at the start.
-// Corresponds to the following:
-// plus_sign*([punctuation]*[digits]){3,}([punctuation]|[digits]|[alpha])*
-scoped_ptr<const string> valid_phone_number;
 
 // Default extension prefix to use when formatting. This will be put in front of
 // any extension component of the number, after the main national number is
 // One-character symbols that can be used to indicate an extension.
 const char kSingleExtnSymbolsForMatching[] =
     "x\xEF\xBD\x98#\xEF\xBC\x83~\xEF\xBD\x9E";
-// Regexp of all possible ways to write extensions, for use when parsing. This
-// will be run as a case-insensitive regexp match. Wide character versions are
-// also provided after each ASCII version.
-scoped_ptr<const string> extn_patterns_for_parsing;
-scoped_ptr<const string> extn_patterns_for_matching;
-// Regexp of all known extension prefixes used by different regions followed
-// by 1 or more valid digits, for use when parsing.
-scoped_ptr<const RegExp> extn_pattern;
-
-// We append optionally the extension pattern to the end here, as a valid phone
-// number may have an extension prefix appended, followed by 1 or more digits.
-scoped_ptr<const RegExp> valid_phone_number_pattern;
-
-// We use this pattern to check if the phone number has at least three letters
-// in it - if so, then we treat it as a number where some phone-number digits
-// are represented by letters.
-scoped_ptr<const RegExp> valid_alpha_phone_pattern;
-
-scoped_ptr<const RegExp> first_group_capturing_pattern;
-
-scoped_ptr<const RegExp> carrier_code_pattern;
 
 bool LoadCompiledInMetadata(PhoneMetadataCollection* metadata) {
   if (!metadata->ParseFromArray(metadata_get(), metadata_size())) {
 }
 
 bool IsNumberMatchingDesc(const string& national_number,
-                          const PhoneNumberDesc& number_desc) {
+                          const PhoneNumberDesc& number_desc,
+                          RegExpCache* regexp_cache) {
   return regexp_cache->GetRegExp(number_desc.possible_number_pattern())
              .FullMatch(national_number) &&
          regexp_cache->GetRegExp(number_desc.national_number_pattern())
 }
 
 PhoneNumberUtil::PhoneNumberType GetNumberTypeHelper(
-    const string& national_number, const PhoneMetadata& metadata) {
+    const string& national_number, const PhoneMetadata& metadata,
+    RegExpCache* regexp_cache) {
   const PhoneNumberDesc& general_desc = metadata.general_desc();
   if (!general_desc.has_national_number_pattern() ||
-      !IsNumberMatchingDesc(national_number, general_desc)) {
+      !IsNumberMatchingDesc(national_number, general_desc, regexp_cache)) {
     VLOG(4) << "Number type unknown - doesn't match general national number"
             << " pattern.";
     return PhoneNumberUtil::UNKNOWN;
   }
-  if (IsNumberMatchingDesc(national_number, metadata.premium_rate())) {
+  if (IsNumberMatchingDesc(national_number, metadata.premium_rate(),
+                           regexp_cache)) {
     VLOG(4) << "Number is a premium number.";
     return PhoneNumberUtil::PREMIUM_RATE;
   }
-  if (IsNumberMatchingDesc(national_number, metadata.toll_free())) {
+  if (IsNumberMatchingDesc(national_number, metadata.toll_free(),
+                           regexp_cache)) {
     VLOG(4) << "Number is a toll-free number.";
     return PhoneNumberUtil::TOLL_FREE;
   }
-  if (IsNumberMatchingDesc(national_number, metadata.shared_cost())) {
+  if (IsNumberMatchingDesc(national_number, metadata.shared_cost(),
+                           regexp_cache)) {
     VLOG(4) << "Number is a shared cost number.";
     return PhoneNumberUtil::SHARED_COST;
   }
-  if (IsNumberMatchingDesc(national_number, metadata.voip())) {
+  if (IsNumberMatchingDesc(national_number, metadata.voip(), regexp_cache)) {
     VLOG(4) << "Number is a VOIP (Voice over IP) number.";
     return PhoneNumberUtil::VOIP;
   }
-  if (IsNumberMatchingDesc(national_number, metadata.personal_number())) {
+  if (IsNumberMatchingDesc(national_number, metadata.personal_number(),
+                           regexp_cache)) {
     VLOG(4) << "Number is a personal number.";
     return PhoneNumberUtil::PERSONAL_NUMBER;
   }
-  if (IsNumberMatchingDesc(national_number, metadata.pager())) {
+  if (IsNumberMatchingDesc(national_number, metadata.pager(), regexp_cache)) {
     VLOG(4) << "Number is a pager number.";
     return PhoneNumberUtil::PAGER;
   }
-  if (IsNumberMatchingDesc(national_number, metadata.uan())) {
+  if (IsNumberMatchingDesc(national_number, metadata.uan(), regexp_cache)) {
     VLOG(4) << "Number is a UAN.";
     return PhoneNumberUtil::UAN;
   }
 
   bool is_fixed_line =
-      IsNumberMatchingDesc(national_number, metadata.fixed_line());
+      IsNumberMatchingDesc(national_number, metadata.fixed_line(),
+                           regexp_cache);
   if (is_fixed_line) {
     if (metadata.same_mobile_and_fixed_line_pattern()) {
       VLOG(4) << "Fixed-line and mobile patterns equal, number is fixed-line"
               << " or mobile";
       return PhoneNumberUtil::FIXED_LINE_OR_MOBILE;
-    } else if (IsNumberMatchingDesc(national_number, metadata.mobile())) {
+    } else if (IsNumberMatchingDesc(national_number, metadata.mobile(),
+                                    regexp_cache)) {
       VLOG(4) << "Fixed-line and mobile patterns differ, but number is "
               << "still fixed-line or mobile";
       return PhoneNumberUtil::FIXED_LINE_OR_MOBILE;
   // Otherwise, test to see if the number is mobile. Only do this if certain
   // that the patterns for mobile and fixed line aren't the same.
   if (!metadata.same_mobile_and_fixed_line_pattern() &&
-      IsNumberMatchingDesc(national_number, metadata.mobile())) {
+      IsNumberMatchingDesc(national_number, metadata.mobile(), regexp_cache)) {
     VLOG(4) << "Number is a mobile number.";
     return PhoneNumberUtil::MOBILE;
   }
   return codepoint;
 }
 
-void InitializeStaticMapsAndSets() {
-  // Create global objects.
-  regexp_factory.reset(new RegExpFactory());
-  regexp_cache.reset(new RegExpCache(*regexp_factory.get(), 128));
-  all_plus_number_grouping_symbols.reset(new map<char32, char>);
-  diallable_char_mappings.reset(new map<char32, char>);
-  alpha_mappings.reset(new map<char32, char>);
-  alpha_phone_mappings.reset(new map<char32, char>);
-
-
-  diallable_char_mappings->insert(make_pair('+', '+'));
-  diallable_char_mappings->insert(make_pair('*', '*'));
-  // Punctuation that we wish to respect in alpha numbers, as they show number
-  // groupings are mapped here.
-  all_plus_number_grouping_symbols->insert(
-      make_pair(ToUnicodeCodepoint("-"), '-'));
-  all_plus_number_grouping_symbols->insert(
-      make_pair(ToUnicodeCodepoint("\xEF\xBC\x8D" /* "-" */), '-'));
-  all_plus_number_grouping_symbols->insert(
-      make_pair(ToUnicodeCodepoint("\xE2\x80\x90" /* "‐" */), '-'));
-  all_plus_number_grouping_symbols->insert(
-      make_pair(ToUnicodeCodepoint("\xE2\x80\x91" /* "‑" */), '-'));
-  all_plus_number_grouping_symbols->insert(
-      make_pair(ToUnicodeCodepoint("\xE2\x80\x92" /* "‒" */), '-'));
-  all_plus_number_grouping_symbols->insert(
-      make_pair(ToUnicodeCodepoint("\xE2\x80\x93" /* "–" */), '-'));
-  all_plus_number_grouping_symbols->insert(
-      make_pair(ToUnicodeCodepoint("\xE2\x80\x94" /* "—" */), '-'));
-  all_plus_number_grouping_symbols->insert(
-      make_pair(ToUnicodeCodepoint("\xE2\x80\x95" /* "―" */), '-'));
-  all_plus_number_grouping_symbols->insert(
-      make_pair(ToUnicodeCodepoint("\xE2\x88\x92" /* "−" */), '-'));
-  all_plus_number_grouping_symbols->insert(
-      make_pair(ToUnicodeCodepoint("/"), '/'));
-  all_plus_number_grouping_symbols->insert(
-      make_pair(ToUnicodeCodepoint("\xEF\xBC\x8F" /* "/" */), '/'));
-  all_plus_number_grouping_symbols->insert(
-      make_pair(ToUnicodeCodepoint(" "), ' '));
-  all_plus_number_grouping_symbols->insert(
-      make_pair(ToUnicodeCodepoint("\xE3\x80\x80" /* " " */), ' '));
-  all_plus_number_grouping_symbols->insert(
-      make_pair(ToUnicodeCodepoint("\xE2\x81\xA0"), ' '));
-  all_plus_number_grouping_symbols->insert(
-      make_pair(ToUnicodeCodepoint("."), '.'));
-  all_plus_number_grouping_symbols->insert(
-      make_pair(ToUnicodeCodepoint("\xEF\xBC\x8E" /* "." */), '.'));
-  // Only the upper-case letters are added here - the lower-case versions are
-  // added programmatically.
-  alpha_mappings->insert(make_pair(ToUnicodeCodepoint("A"), '2'));
-  alpha_mappings->insert(make_pair(ToUnicodeCodepoint("B"), '2'));
-  alpha_mappings->insert(make_pair(ToUnicodeCodepoint("C"), '2'));
-  alpha_mappings->insert(make_pair(ToUnicodeCodepoint("D"), '3'));
-  alpha_mappings->insert(make_pair(ToUnicodeCodepoint("E"), '3'));
-  alpha_mappings->insert(make_pair(ToUnicodeCodepoint("F"), '3'));
-  alpha_mappings->insert(make_pair(ToUnicodeCodepoint("G"), '4'));
-  alpha_mappings->insert(make_pair(ToUnicodeCodepoint("H"), '4'));
-  alpha_mappings->insert(make_pair(ToUnicodeCodepoint("I"), '4'));
-  alpha_mappings->insert(make_pair(ToUnicodeCodepoint("J"), '5'));
-  alpha_mappings->insert(make_pair(ToUnicodeCodepoint("K"), '5'));
-  alpha_mappings->insert(make_pair(ToUnicodeCodepoint("L"), '5'));
-  alpha_mappings->insert(make_pair(ToUnicodeCodepoint("M"), '6'));
-  alpha_mappings->insert(make_pair(ToUnicodeCodepoint("N"), '6'));
-  alpha_mappings->insert(make_pair(ToUnicodeCodepoint("O"), '6'));
-  alpha_mappings->insert(make_pair(ToUnicodeCodepoint("P"), '7'));
-  alpha_mappings->insert(make_pair(ToUnicodeCodepoint("Q"), '7'));
-  alpha_mappings->insert(make_pair(ToUnicodeCodepoint("R"), '7'));
-  alpha_mappings->insert(make_pair(ToUnicodeCodepoint("S"), '7'));
-  alpha_mappings->insert(make_pair(ToUnicodeCodepoint("T"), '8'));
-  alpha_mappings->insert(make_pair(ToUnicodeCodepoint("U"), '8'));
-  alpha_mappings->insert(make_pair(ToUnicodeCodepoint("V"), '8'));
-  alpha_mappings->insert(make_pair(ToUnicodeCodepoint("W"), '9'));
-  alpha_mappings->insert(make_pair(ToUnicodeCodepoint("X"), '9'));
-  alpha_mappings->insert(make_pair(ToUnicodeCodepoint("Y"), '9'));
-  alpha_mappings->insert(make_pair(ToUnicodeCodepoint("Z"), '9'));
-  map<char32, char> lower_case_mappings;
-  map<char32, char> alpha_letters;
-  for (map<char32, char>::const_iterator it = alpha_mappings->begin();
-       it != alpha_mappings->end();
-       ++it) {
-    // Convert all the upper-case ASCII letters to lower-case.
-    if (it->first < 128) {
-      char letter_as_upper = static_cast<char>(it->first);
-      char32 letter_as_lower = static_cast<char32>(tolower(letter_as_upper));
-      lower_case_mappings.insert(make_pair(letter_as_lower, it->second));
-      // Add the letters in both variants to the alpha_letters map. This just
-      // pairs each letter with its upper-case representation so that it can be
-      // retained when normalising alpha numbers.
-      alpha_letters.insert(make_pair(letter_as_lower, letter_as_upper));
-      alpha_letters.insert(make_pair(it->first, letter_as_upper));
-    }
-  }
-  // In the Java version we don't insert the lower-case mappings in the map,
-  // because we convert to upper case on the fly. Doing this here would involve
-  // pulling in all of ICU, which we don't want to do if we don't have to.
-  alpha_mappings->insert(lower_case_mappings.begin(),
-                         lower_case_mappings.end());
-  alpha_phone_mappings->insert(alpha_mappings->begin(),
-                               alpha_mappings->end());
-  all_plus_number_grouping_symbols->insert(alpha_letters.begin(),
-                                           alpha_letters.end());
-  // Add the ASCII digits so that they don't get deleted by NormalizeHelper().
-  for (char c = '0'; c <= '9'; ++c) {
-    diallable_char_mappings->insert(make_pair(c, c));
-    alpha_phone_mappings->insert(make_pair(c, c));
-    all_plus_number_grouping_symbols->insert(make_pair(c, c));
-  }
-}
-
 // Helper initialiser method to create the regular-expression pattern to match
 // extensions, allowing the one-codepoint extension symbols provided by
 // single_extn_symbols.
   Logger::set_logger_impl(logger);
 }
 
+class PhoneNumberRegExpsAndMappings {
+ private:
+  void InitializeMapsAndSets() {
+    diallable_char_mappings_.insert(make_pair('+', '+'));
+    diallable_char_mappings_.insert(make_pair('*', '*'));
+    // Here we insert all punctuation symbols that we wish to respect when
+    // formatting alpha numbers, as they show the intended number groupings.
+    all_plus_number_grouping_symbols_.insert(
+        make_pair(ToUnicodeCodepoint("-"), '-'));
+    all_plus_number_grouping_symbols_.insert(
+        make_pair(ToUnicodeCodepoint("\xEF\xBC\x8D" /* "-" */), '-'));
+    all_plus_number_grouping_symbols_.insert(
+        make_pair(ToUnicodeCodepoint("\xE2\x80\x90" /* "‐" */), '-'));
+    all_plus_number_grouping_symbols_.insert(
+        make_pair(ToUnicodeCodepoint("\xE2\x80\x91" /* "‑" */), '-'));
+    all_plus_number_grouping_symbols_.insert(
+        make_pair(ToUnicodeCodepoint("\xE2\x80\x92" /* "‒" */), '-'));
+    all_plus_number_grouping_symbols_.insert(
+        make_pair(ToUnicodeCodepoint("\xE2\x80\x93" /* "–" */), '-'));
+    all_plus_number_grouping_symbols_.insert(
+        make_pair(ToUnicodeCodepoint("\xE2\x80\x94" /* "—" */), '-'));
+    all_plus_number_grouping_symbols_.insert(
+        make_pair(ToUnicodeCodepoint("\xE2\x80\x95" /* "―" */), '-'));
+    all_plus_number_grouping_symbols_.insert(
+        make_pair(ToUnicodeCodepoint("\xE2\x88\x92" /* "−" */), '-'));
+    all_plus_number_grouping_symbols_.insert(
+        make_pair(ToUnicodeCodepoint("/"), '/'));
+    all_plus_number_grouping_symbols_.insert(
+        make_pair(ToUnicodeCodepoint("\xEF\xBC\x8F" /* "/" */), '/'));
+    all_plus_number_grouping_symbols_.insert(
+        make_pair(ToUnicodeCodepoint(" "), ' '));
+    all_plus_number_grouping_symbols_.insert(
+        make_pair(ToUnicodeCodepoint("\xE3\x80\x80" /* " " */), ' '));
+    all_plus_number_grouping_symbols_.insert(
+        make_pair(ToUnicodeCodepoint("\xE2\x81\xA0"), ' '));
+    all_plus_number_grouping_symbols_.insert(
+        make_pair(ToUnicodeCodepoint("."), '.'));
+    all_plus_number_grouping_symbols_.insert(
+        make_pair(ToUnicodeCodepoint("\xEF\xBC\x8E" /* "." */), '.'));
+    // Only the upper-case letters are added here - the lower-case versions are
+    // added programmatically.
+    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("A"), '2'));
+    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("B"), '2'));
+    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("C"), '2'));
+    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("D"), '3'));
+    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("E"), '3'));
+    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("F"), '3'));
+    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("G"), '4'));
+    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("H"), '4'));
+    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("I"), '4'));
+    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("J"), '5'));
+    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("K"), '5'));
+    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("L"), '5'));
+    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("M"), '6'));
+    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("N"), '6'));
+    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("O"), '6'));
+    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("P"), '7'));
+    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("Q"), '7'));
+    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("R"), '7'));
+    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("S"), '7'));
+    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("T"), '8'));
+    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("U"), '8'));
+    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("V"), '8'));
+    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("W"), '9'));
+    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("X"), '9'));
+    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("Y"), '9'));
+    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("Z"), '9'));
+    map<char32, char> lower_case_mappings;
+    map<char32, char> alpha_letters;
+    for (map<char32, char>::const_iterator it = alpha_mappings_.begin();
+         it != alpha_mappings_.end();
+         ++it) {
+      // Convert all the upper-case ASCII letters to lower-case.
+      if (it->first < 128) {
+        char letter_as_upper = static_cast<char>(it->first);
+        char32 letter_as_lower = static_cast<char32>(tolower(letter_as_upper));
+        lower_case_mappings.insert(make_pair(letter_as_lower, it->second));
+        // Add the letters in both variants to the alpha_letters map. This just
+        // pairs each letter with its upper-case representation so that it can
+        // be retained when normalising alpha numbers.
+        alpha_letters.insert(make_pair(letter_as_lower, letter_as_upper));
+        alpha_letters.insert(make_pair(it->first, letter_as_upper));
+      }
+    }
+    // In the Java version we don't insert the lower-case mappings in the map,
+    // because we convert to upper case on the fly. Doing this here would
+    // involve pulling in all of ICU, which we don't want to do if we don't have
+    // to.
+    alpha_mappings_.insert(lower_case_mappings.begin(),
+                           lower_case_mappings.end());
+    alpha_phone_mappings_.insert(alpha_mappings_.begin(),
+                                 alpha_mappings_.end());
+    all_plus_number_grouping_symbols_.insert(alpha_letters.begin(),
+                                             alpha_letters.end());
+    // Add the ASCII digits so that they don't get deleted by NormalizeHelper().
+    for (char c = '0'; c <= '9'; ++c) {
+      diallable_char_mappings_.insert(make_pair(c, c));
+      alpha_phone_mappings_.insert(make_pair(c, c));
+      all_plus_number_grouping_symbols_.insert(make_pair(c, c));
+    }
+  }
+
+  // Regular expression of viable phone numbers. This is location independent.
+  // Checks we have at least three leading digits, and only valid punctuation,
+  // alpha characters and digits in the phone number. Does not include extension
+  // data. The symbol 'x' is allowed here as valid punctuation since it is often
+  // used as a placeholder for carrier codes, for example in Brazilian phone
+  // numbers. We also allow multiple plus-signs at the start.
+  // Corresponds to the following:
+  // plus_sign*([punctuation]*[digits]){3,}([punctuation]|[digits]|[alpha])*
+  const string valid_phone_number_;
+
+  // Regexp of all possible ways to write extensions, for use when parsing. This
+  // will be run as a case-insensitive regexp match. Wide character versions are
+  // also provided after each ASCII version.
+  // For parsing, we are slightly more lenient in our interpretation than for
+  // matching. Here we allow a "comma" as a possible extension indicator. When
+  // matching, this is hardly ever used to indicate this.
+  const string extn_patterns_for_parsing_;
+
+ public:
+  scoped_ptr<const AbstractRegExpFactory> regexp_factory_;
+  scoped_ptr<RegExpCache> regexp_cache_;
+
+  // A map that contains characters that are essential when dialling. That means
+  // any of the characters in this map must not be removed from a number when
+  // dialing, otherwise the call will not reach the intended destination.
+  map<char32, char> diallable_char_mappings_;
+  // These mappings map a character (key) to a specific digit that should
+  // replace it for normalization purposes.
+  map<char32, char> alpha_mappings_;
+  // For performance reasons, store a map of combining alpha_mappings with ASCII
+  // digits.
+  map<char32, char> alpha_phone_mappings_;
+
+  // Separate map of all symbols that we wish to retain when formatting alpha
+  // numbers. This includes digits, ascii letters and number grouping symbols
+  // such as "-" and " ".
+  map<char32, char> all_plus_number_grouping_symbols_;
+
+  // Pattern that makes it easy to distinguish whether a region has a unique
+  // international dialing prefix or not. If a region has a unique international
+  // prefix (e.g. 011 in USA), it will be represented as a string that contains
+  // a sequence of ASCII digits. If there are multiple available international
+  // prefixes in a region, they will be represented as a regex string that
+  // always contains character(s) other than ASCII digits.
+  // Note this regex also includes tilde, which signals waiting for the tone.
+  scoped_ptr<const RegExp> unique_international_prefix_;
+
+  scoped_ptr<const RegExp> digits_pattern_;
+  scoped_ptr<const RegExp> capturing_digit_pattern_;
+  scoped_ptr<const RegExp> capturing_ascii_digits_pattern_;
+
+  // Regular expression of acceptable characters that may start a phone number
+  // for the purposes of parsing. This allows us to strip away meaningless
+  // prefixes to phone numbers that may be mistakenly given to us. This consists
+  // of digits, the plus symbol and arabic-indic digits. This does not contain
+  // alpha characters, although they may be used later in the number. It also
+  // does not include other punctuation, as this will be stripped later during
+  // parsing and is of no information value when parsing a number. The string
+  // starting with this valid character is captured.
+  // This corresponds to VALID_START_CHAR in the java version.
+  scoped_ptr<const RegExp> valid_start_char_pattern_;
+
+  // Regular expression of valid characters before a marker that might indicate
+  // a second number.
+  scoped_ptr<const RegExp> capture_up_to_second_number_start_pattern_;
+
+  // Regular expression of trailing characters that we want to remove. We remove
+  // all characters that are not alpha or numerical characters. The hash
+  // character is retained here, as it may signify the previous block was an
+  // extension. Note the capturing block at the start to capture the rest of the
+  // number if this was a match.
+  // This corresponds to UNWANTED_END_CHAR_PATTERN in the java version.
+  scoped_ptr<const RegExp> unwanted_end_char_pattern_;
+
+  // Regular expression of groups of valid punctuation characters.
+  scoped_ptr<const RegExp> separator_pattern_;
+
+  // Regexp of all possible ways to write extensions, for use when finding phone
+  // numbers in text. This will be run as a case-insensitive regexp match. Wide
+  // character versions are also provided after each ASCII version.
+  const string extn_patterns_for_matching_;
+
+  // Regexp of all known extension prefixes used by different regions followed
+  // by 1 or more valid digits, for use when parsing.
+  scoped_ptr<const RegExp> extn_pattern_;
+
+  // We append optionally the extension pattern to the end here, as a valid
+  // phone number may have an extension prefix appended, followed by 1 or more
+  // digits.
+  scoped_ptr<const RegExp> valid_phone_number_pattern_;
+
+  // We use this pattern to check if the phone number has at least three letters
+  // in it - if so, then we treat it as a number where some phone-number digits
+  // are represented by letters.
+  scoped_ptr<const RegExp> valid_alpha_phone_pattern_;
+
+  scoped_ptr<const RegExp> first_group_capturing_pattern_;
+
+  scoped_ptr<const RegExp> carrier_code_pattern_;
+
+  scoped_ptr<const RegExp> plus_chars_pattern_;
+
+  PhoneNumberRegExpsAndMappings()
+      : valid_phone_number_(
+            StrCat("[", PhoneNumberUtil::kPlusChars, "]*(?:[",
+                   PhoneNumberUtil::kValidPunctuation, "]*[",
+                   kDigits, "]){3,}[", kValidAlpha,
+                   PhoneNumberUtil::kValidPunctuation, kDigits, "]*")),
+        extn_patterns_for_parsing_(
+            CreateExtnPattern(StrCat(",", kSingleExtnSymbolsForMatching))),
+        regexp_factory_(new RegExpFactory()),
+        regexp_cache_(new RegExpCache(*regexp_factory_.get(), 128)),
+        diallable_char_mappings_(),
+        alpha_mappings_(),
+        alpha_phone_mappings_(),
+        all_plus_number_grouping_symbols_(),
+        unique_international_prefix_(regexp_factory_->CreateRegExp(
+            /* "[\\d]+(?:[~⁓∼~][\\d]+)?" */
+            "[\\d]+(?:[~\xE2\x81\x93\xE2\x88\xBC\xEF\xBD\x9E][\\d]+)?")),
+        digits_pattern_(
+            regexp_factory_->CreateRegExp(StrCat("[", kDigits, "]*"))),
+        capturing_digit_pattern_(
+            regexp_factory_->CreateRegExp(StrCat("([", kDigits, "])"))),
+        capturing_ascii_digits_pattern_(
+            regexp_factory_->CreateRegExp("(\\d+)")),
+        valid_start_char_pattern_(regexp_factory_->CreateRegExp(
+            StrCat("[", PhoneNumberUtil::kPlusChars, kDigits, "]"))),
+        capture_up_to_second_number_start_pattern_(
+            regexp_factory_->CreateRegExp(
+                PhoneNumberUtil::kCaptureUpToSecondNumberStart)),
+        unwanted_end_char_pattern_(
+            regexp_factory_->CreateRegExp("[^\\p{N}\\p{L}#]")),
+        separator_pattern_(
+            regexp_factory_->CreateRegExp(
+                StrCat("[", PhoneNumberUtil::kValidPunctuation, "]+"))),
+        extn_patterns_for_matching_(
+            CreateExtnPattern(kSingleExtnSymbolsForMatching)),
+        extn_pattern_(regexp_factory_->CreateRegExp(
+            StrCat("(?i)(?:", extn_patterns_for_parsing_, ")$"))),
+        valid_phone_number_pattern_(regexp_factory_->CreateRegExp(
+            StrCat("(?i)", valid_phone_number_,
+                   "(?:", extn_patterns_for_parsing_, ")?"))),
+        valid_alpha_phone_pattern_(regexp_factory_->CreateRegExp(
+            StrCat("(?i)(?:.*?[", kValidAlpha, "]){3}"))),
+        // The first_group_capturing_pattern was originally set to $1 but there
+        // are some countries for which the first group is not used in the
+        // national pattern (e.g. Argentina) so the $1 group does not match
+        // correctly. Therefore, we use \d, so that the first group actually
+        // used in the pattern will be matched.
+        first_group_capturing_pattern_(
+            regexp_factory_->CreateRegExp("(\\$\\d)")),
+        carrier_code_pattern_(regexp_factory_->CreateRegExp("\\$CC")),
+        plus_chars_pattern_(
+            regexp_factory_->CreateRegExp(
+                StrCat("[", PhoneNumberUtil::kPlusChars, "]+"))) {
+    InitializeMapsAndSets();
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(PhoneNumberRegExpsAndMappings);
+};
+
 // Private constructor. Also takes care of initialisation.
 PhoneNumberUtil::PhoneNumberUtil()
-    : country_calling_code_to_region_code_map_(new vector<IntRegionsPair>()),
+    : logger_(new StdoutLogger()),
+      reg_exps_(new PhoneNumberRegExpsAndMappings),
+      country_calling_code_to_region_code_map_(new vector<IntRegionsPair>()),
       nanpa_regions_(new set<string>()),
       region_to_metadata_map_(new map<string, PhoneMetadata>()) {
-  logger_.reset(new StdoutLogger());
   Logger::set_logger_impl(logger_.get());
   PhoneMetadataCollection metadata_collection;
   if (!LoadCompiledInMetadata(&metadata_collection)) {
   sort(country_calling_code_to_region_code_map_->begin(),
        country_calling_code_to_region_code_map_->end(),
        OrderByFirst());
-
-  InitializeStaticMapsAndSets();
-  CreateRegularExpressions();
 }
 
 PhoneNumberUtil::~PhoneNumberUtil() {
 }
 #endif
 
-void PhoneNumberUtil::CreateRegularExpressions() const {
-  unique_international_prefix.reset(regexp_factory->CreateRegExp(
-     /* "[\\d]+(?:[~⁓∼~][\\d]+)?" */
-      "[\\d]+(?:[~\xE2\x81\x93\xE2\x88\xBC\xEF\xBD\x9E][\\d]+)?"));
-  // The first_group_capturing_pattern was originally set to $1 but there are
-  // some countries for which the first group is not used in the national
-  // pattern (e.g. Argentina) so the $1 group does not match correctly.
-  // Therefore, we use \d, so that the first group actually used in the pattern
-  // will be matched.
-  first_group_capturing_pattern.reset(regexp_factory->CreateRegExp("(\\$\\d)"));
-  carrier_code_pattern.reset(regexp_factory->CreateRegExp("\\$CC"));
-  digits_pattern.reset(
-      regexp_factory->CreateRegExp(StrCat("[", kDigits, "]*")));
-  capturing_digit_pattern.reset(
-      regexp_factory->CreateRegExp(StrCat("([", kDigits, "])")));
-  capturing_ascii_digits_pattern.reset(regexp_factory->CreateRegExp("(\\d+)"));
-  valid_start_char.reset(new string(StrCat("[", kPlusChars, kDigits, "]")));
-  valid_start_char_pattern.reset(
-      regexp_factory->CreateRegExp(*valid_start_char));
-  capture_up_to_second_number_start_pattern.reset(regexp_factory->CreateRegExp(
-      kCaptureUpToSecondNumberStart));
-  unwanted_end_char_pattern.reset(
-      regexp_factory->CreateRegExp(kUnwantedEndChar));
-  separator_pattern.reset(
-      regexp_factory->CreateRegExp(StrCat("[", kValidPunctuation, "]+")));
-  valid_phone_number.reset(new string(
-      StrCat("[", kPlusChars, "]*(?:[", kValidPunctuation, "]*[", kDigits,
-             "]){3,}[", kValidAlpha, kValidPunctuation, kDigits, "]*")));
-  // For parsing, we are slightly more lenient in our interpretation than for
-  // matching. Here we allow a "comma" as a possible extension indicator. When
-  // matching, this is hardly ever used to indicate this.
-  const string single_extn_symbols_for_parsing =
-      StrCat(",", kSingleExtnSymbolsForMatching);
-  extn_patterns_for_parsing.reset(
-      new string(CreateExtnPattern(single_extn_symbols_for_parsing)));
-  extn_patterns_for_matching.reset(
-      new string(CreateExtnPattern(kSingleExtnSymbolsForMatching)));
-
-  extn_pattern.reset(regexp_factory->CreateRegExp(
-      StrCat("(?i)(?:", *extn_patterns_for_parsing, ")$")));
-  valid_phone_number_pattern.reset(regexp_factory->CreateRegExp(
-      StrCat("(?i)", *valid_phone_number,
-             "(?:", *extn_patterns_for_parsing, ")?")));
-  valid_alpha_phone_pattern.reset(regexp_factory->CreateRegExp(
-      StrCat("(?i)(?:.*?[", kValidAlpha, "]){3}")));
-  plus_chars_pattern.reset(
-      regexp_factory->CreateRegExp(StrCat("[", kPlusChars, "]+")));
-}
-
 const string& PhoneNumberUtil::GetExtnPatternsForMatching() const {
-  return *(extn_patterns_for_matching.get());
+  return reg_exps_->extn_patterns_for_matching_;
 }
 
 bool PhoneNumberUtil::ContainsOnlyValidDigits(const string& s) const {
-  return digits_pattern->FullMatch(s);
+  return reg_exps_->digits_pattern_->FullMatch(s);
 }
 
 void PhoneNumberUtil::TrimUnwantedEndChars(string* number) const {
   for (; reverse_it.base() != number_as_unicode.begin(); ++reverse_it) {
     len = reverse_it.get_utf8(current_char);
     current_char[len] = '\0';
-    if (!unwanted_end_char_pattern->FullMatch(current_char)) {
+    if (!reg_exps_->unwanted_end_char_pattern_->FullMatch(current_char)) {
       break;
     }
   }
   // followed by a single digit, separated by valid phone number punctuation.
   // This prevents invalid punctuation (such as the star sign in Israeli star
   // numbers) getting into the output of the AYTF.
-  const RegExp& eligible_format_pattern = regexp_cache->GetRegExp(
+  const RegExp& eligible_format_pattern = reg_exps_->regexp_cache_->GetRegExp(
       StrCat("[", kValidPunctuation, "]*", "(\\$\\d", "[",
              kValidPunctuation, "]*)+"));
   return eligible_format_pattern.FullMatch(format);
     formatted_number->assign("");
   }
   if (!with_formatting) {
-    NormalizeHelper(*diallable_char_mappings, true, /* remove non matches */
-                    formatted_number);
+    NormalizeHelper(reg_exps_->diallable_char_mappings_,
+                    true /* remove non matches */, formatted_number);
   }
 }
 
   // format of the number is returned, unless there is a preferred international
   // prefix.
   const string international_prefix_for_formatting(
-      unique_international_prefix->FullMatch(international_prefix)
+      reg_exps_->unique_international_prefix_->FullMatch(international_prefix)
       ? international_prefix
       : metadata->preferred_international_prefix());
   if (!international_prefix_for_formatting.empty()) {
   // this by comparing the number in raw_input with the parsed number.
   string raw_input_copy(number.raw_input());
   // Normalize punctuation. We retain number grouping symbols such as " " only.
-  NormalizeHelper(*all_plus_number_grouping_symbols, true, &raw_input_copy);
+  NormalizeHelper(reg_exps_->all_plus_number_grouping_symbols_, true,
+                  &raw_input_copy);
   // Now we trim everything before the first three digits in the parsed number.
   // We choose three because all valid alpha numbers have 3 digits at the start
   // - if it does not, then we don't trim anything at all. Similarly, if the
   // format of the number is returned, unless there is a preferred international
   // prefix.
   const string international_prefix_for_formatting(
-      unique_international_prefix->FullMatch(international_prefix)
+      reg_exps_->unique_international_prefix_->FullMatch(international_prefix)
       ? international_prefix
       : metadata->preferred_international_prefix());
   if (!international_prefix_for_formatting.empty()) {
     int size = it->leading_digits_pattern_size();
     if (size > 0) {
       const scoped_ptr<RegExpInput> number_copy(
-          regexp_factory->CreateInput(number_for_leading_digits_match));
+          reg_exps_->regexp_factory_->CreateInput(
+              number_for_leading_digits_match));
       // We always use the last leading_digits_pattern, as it is the most
       // detailed.
-      if (!regexp_cache->GetRegExp(it->leading_digits_pattern(size - 1))
-              .Consume(number_copy.get())) {
+      if (!reg_exps_->regexp_cache_->GetRegExp(
+              it->leading_digits_pattern(size - 1)).Consume(
+                  number_copy.get())) {
         continue;
       }
     }
-    const RegExp& pattern_to_match(regexp_cache->GetRegExp(it->pattern()));
+    const RegExp& pattern_to_match(
+        reg_exps_->regexp_cache_->GetRegExp(it->pattern()));
     if (pattern_to_match.FullMatch(national_number)) {
       return &(*it);
     }
     // Replace the $CC in the formatting rule with the desired carrier code.
     string carrier_code_formatting_rule =
         format->domestic_carrier_code_formatting_rule();
-    carrier_code_pattern->Replace(&carrier_code_formatting_rule,
-                                  carrier_code);
-    first_group_capturing_pattern->Replace(&formatting_pattern,
-                                           carrier_code_formatting_rule);
+    reg_exps_->carrier_code_pattern_->Replace(&carrier_code_formatting_rule,
+                                              carrier_code);
+    reg_exps_->first_group_capturing_pattern_->Replace(&formatting_pattern,
+                                               carrier_code_formatting_rule);
   } else {
     // Use the national prefix formatting rule instead.
     string national_prefix_formatting_rule =
       // Apply the national_prefix_formatting_rule as the formatting_pattern
       // contains only information on how the national significant number
       // should be formatted at this point.
-      first_group_capturing_pattern->Replace(
+      reg_exps_->first_group_capturing_pattern_->Replace(
           &formatting_pattern, national_prefix_formatting_rule);
     }
   }
   formatted_number->assign(national_number);
-  const RegExp& pattern_to_match(regexp_cache->GetRegExp(format->pattern()));
+  const RegExp& pattern_to_match(
+      reg_exps_->regexp_cache_->GetRegExp(format->pattern()));
   pattern_to_match.GlobalReplace(formatted_number, formatting_pattern);
 }
 
                                       number, carrier_code, formatted_number);
   if (number_format == RFC3966) {
     // Replace all separators with a "-".
-    separator_pattern->GlobalReplace(formatted_number, "-");
+    reg_exps_->separator_pattern_->GlobalReplace(formatted_number, "-");
   }
 }
 
     const PhoneMetadata* metadata = GetMetadataForRegion(*it);
     if (metadata->has_leading_digits()) {
       const scoped_ptr<RegExpInput> number(
-          regexp_factory->CreateInput(national_number));
-      if (regexp_cache->GetRegExp(metadata->leading_digits()).Consume(
-          number.get())) {
+          reg_exps_->regexp_factory_->CreateInput(national_number));
+      if (reg_exps_->regexp_cache_->
+              GetRegExp(metadata->leading_digits()).Consume(number.get())) {
         *region_code = *it;
         return;
       }
-    } else if (GetNumberTypeHelper(national_number, *metadata) != UNKNOWN) {
+    } else if (GetNumberTypeHelper(national_number, *metadata,
+                                   reg_exps_->regexp_cache_.get()) != UNKNOWN) {
       *region_code = *it;
       return;
     }
     const string& default_region) const {
   if (!IsValidRegionCode(default_region) && !number_to_parse.empty()) {
     const scoped_ptr<RegExpInput> number(
-        regexp_factory->CreateInput(number_to_parse));
-    if (!plus_chars_pattern->Consume(number.get())) {
+        reg_exps_->regexp_factory_->CreateInput(number_to_parse));
+    if (!reg_exps_->plus_chars_pattern_->Consume(number.get())) {
       return false;
     }
   }
                               &normalized_national_number, &temp_number);
   if (country_code_error != NO_PARSING_ERROR) {
      const scoped_ptr<RegExpInput> number_string_piece(
-        regexp_factory->CreateInput(national_number));
+        reg_exps_->regexp_factory_->CreateInput(national_number));
     if ((country_code_error == INVALID_COUNTRY_CODE_ERROR) &&
-        (plus_chars_pattern->Consume(number_string_piece.get()))) {
+        (reg_exps_->plus_chars_pattern_->Consume(number_string_piece.get()))) {
       normalized_national_number.assign(number_string_piece->ToString());
       // Strip the plus-char, and try again.
       MaybeExtractCountryCode(country_metadata,
   for (it = number_as_unicode.begin(); it != number_as_unicode.end(); ++it) {
     len = it.get_utf8(current_char);
     current_char[len] = '\0';
-    if (valid_start_char_pattern->FullMatch(current_char)) {
+    if (reg_exps_->valid_start_char_pattern_->FullMatch(current_char)) {
       break;
     }
   }
           << *extracted_number;
 
   // Now remove any extra numbers at the end.
-  capture_up_to_second_number_start_pattern->PartialMatch(*extracted_number,
-                                                          extracted_number);
+  reg_exps_->capture_up_to_second_number_start_pattern_->
+      PartialMatch(*extracted_number, extracted_number);
 }
 
 bool PhoneNumberUtil::IsPossibleNumber(const PhoneNumber& number) const {
       return IS_POSSIBLE;
     }
   }
-  const RegExp& possible_number_pattern = regexp_cache->GetRegExp(
+  const RegExp& possible_number_pattern = reg_exps_->regexp_cache_->GetRegExp(
       StrCat("(", general_num_desc.possible_number_pattern(), ")"));
   return TestNumberLengthAgainstPattern(possible_number_pattern,
                                         national_number);
   string national_significant_number;
   GetNationalSignificantNumber(number, &national_significant_number);
   return GetNumberTypeHelper(national_significant_number,
-                             *GetMetadataForRegion(region_code));
+                             *GetMetadataForRegion(region_code),
+                             reg_exps_->regexp_cache_.get());
 }
 
 bool PhoneNumberUtil::IsValidNumber(const PhoneNumber& number) const {
     return number_length > kMinLengthForNsn &&
         number_length <= kMaxLengthForNsn;
   }
-  return GetNumberTypeHelper(national_number, *metadata) != UNKNOWN;
+  return GetNumberTypeHelper(national_number, *metadata,
+                             reg_exps_->regexp_cache_.get()) != UNKNOWN;
 }
 
 bool PhoneNumberUtil::IsLeadingZeroPossible(int country_calling_code) const {
   string national_significant_number;
   GetNationalSignificantNumber(number, &national_significant_number);
   PhoneNumberType type = GetNumberTypeHelper(national_significant_number,
-                                             *metadata);
+                                             *metadata,
+                                             reg_exps_->regexp_cache_.get());
   // Most numbers other than the two types below have to be dialled in full.
   if (type != FIXED_LINE && type != FIXED_LINE_OR_MOBILE) {
     return 0;
   string formatted_number;
   Format(copied_proto, INTERNATIONAL, &formatted_number);
   const scoped_ptr<RegExpInput> i18n_number(
-      regexp_factory->CreateInput(formatted_number));
+      reg_exps_->regexp_factory_->CreateInput(formatted_number));
   string digit_group;
   string ndc;
   string third_group;
   for (int i = 0; i < 3; ++i) {
-    if (!capturing_ascii_digits_pattern->FindAndConsume(i18n_number.get(),
-                                                        &digit_group)) {
+    if (!reg_exps_->capturing_ascii_digits_pattern_->FindAndConsume(
+            i18n_number.get(), &digit_group)) {
       // We should find at least three groups.
       return 0;
     }
 
 void PhoneNumberUtil::NormalizeDigitsOnly(string* number) const {
   DCHECK(number);
-  const RegExp& non_digits_pattern = regexp_cache->GetRegExp(
+  const RegExp& non_digits_pattern = reg_exps_->regexp_cache_->GetRegExp(
       StrCat("[^", kDigits, "]"));
   // Delete everything that isn't valid digits.
   non_digits_pattern.GlobalReplace(number, "");
   string number_copy(number);
   string extension;
   MaybeStripExtension(&number_copy, &extension);
-  return valid_alpha_phone_pattern->FullMatch(number_copy);
+  return reg_exps_->valid_alpha_phone_pattern_->FullMatch(number_copy);
 }
 
 void PhoneNumberUtil::ConvertAlphaCharactersInNumber(string* number) const {
   DCHECK(number);
-  NormalizeHelper(*alpha_phone_mappings, false, number);
+  NormalizeHelper(reg_exps_->alpha_phone_mappings_, false, number);
 }
 
 // Normalizes a string of characters representing a phone number. This performs
 //   - Spurious alpha characters are stripped.
 void PhoneNumberUtil::Normalize(string* number) const {
   DCHECK(number);
-  if (valid_alpha_phone_pattern->PartialMatch(*number)) {
-    NormalizeHelper(*alpha_phone_mappings, true, number);
+  if (reg_exps_->valid_alpha_phone_pattern_->PartialMatch(*number)) {
+    NormalizeHelper(reg_exps_->alpha_phone_mappings_, true, number);
   }
   NormalizeDigitsOnly(number);
 }
     VLOG(2) << "Number too short to be viable:" << number;
     return false;
   }
-  return valid_phone_number_pattern->FullMatch(number);
+  return reg_exps_->valid_phone_number_pattern_->FullMatch(number);
 }
 
 // Strips the IDD from the start of the number if present. Helper function used
                                        string* number) const {
   DCHECK(number);
   const scoped_ptr<RegExpInput> number_copy(
-      regexp_factory->CreateInput(*number));
+      reg_exps_->regexp_factory_->CreateInput(*number));
   // First attempt to strip the idd_pattern at the start, if present. We make a
   // copy so that we can revert to the original string if necessary.
   if (idd_pattern.Consume(number_copy.get())) {
     // Only strip this if the first digit after the match is not a 0, since
     // country calling codes cannot begin with 0.
     string extracted_digit;
-    if (capturing_digit_pattern->PartialMatch(number_copy->ToString(),
-                                              &extracted_digit)) {
+    if (reg_exps_->capturing_digit_pattern_->PartialMatch(
+            number_copy->ToString(), &extracted_digit)) {
       NormalizeDigitsOnly(&extracted_digit);
       if (extracted_digit == "0") {
         return false;
     return PhoneNumber::FROM_DEFAULT_COUNTRY;
   }
   const scoped_ptr<RegExpInput> number_string_piece(
-      regexp_factory->CreateInput(*number));
-  if (plus_chars_pattern->Consume(number_string_piece.get())) {
+      reg_exps_->regexp_factory_->CreateInput(*number));
+  if (reg_exps_->plus_chars_pattern_->Consume(number_string_piece.get())) {
     number->assign(number_string_piece->ToString());
     // Can now normalize the rest of the number since we've consumed the "+"
     // sign at the start.
     return PhoneNumber::FROM_NUMBER_WITH_PLUS_SIGN;
   }
   // Attempt to parse the first digits as an international prefix.
-  const RegExp& idd_pattern = regexp_cache->GetRegExp(possible_idd_prefix);
+  const RegExp& idd_pattern =
+      reg_exps_->regexp_cache_->GetRegExp(possible_idd_prefix);
   Normalize(number);
   return ParsePrefixAsIdd(idd_pattern, number)
       ? PhoneNumber::FROM_NUMBER_WITH_IDD
   // We use two copies here since Consume modifies the phone number, and if the
   // first if-clause fails the number will already be changed.
   const scoped_ptr<RegExpInput> number_copy(
-      regexp_factory->CreateInput(*number));
+      reg_exps_->regexp_factory_->CreateInput(*number));
   const scoped_ptr<RegExpInput> number_copy_without_transform(
-      regexp_factory->CreateInput(*number));
+      reg_exps_->regexp_factory_->CreateInput(*number));
   string number_string_copy(*number);
   string captured_part_of_prefix;
-  const RegExp& national_number_rule = regexp_cache->GetRegExp(
+  const RegExp& national_number_rule = reg_exps_->regexp_cache_->GetRegExp(
       metadata.general_desc().national_number_pattern());
   // Check if the original number is viable.
   bool is_viable_original_number = national_number_rule.FullMatch(*number);
   // copy so that we can revert to the original string if necessary.
   const string& transform_rule = metadata.national_prefix_transform_rule();
   const RegExp& possible_national_prefix_pattern =
-      regexp_cache->GetRegExp(possible_national_prefix);
+      reg_exps_->regexp_cache_->GetRegExp(possible_national_prefix);
   if (!transform_rule.empty() &&
       (possible_national_prefix_pattern.Consume(
           number_copy.get(), &carrier_code_temp, &captured_part_of_prefix) ||
   string possible_extension_three;
   string number_copy(*number);
   const scoped_ptr<RegExpInput> number_copy_as_regexp_input(
-      regexp_factory->CreateInput(number_copy));
-  if (extn_pattern->Consume(number_copy_as_regexp_input.get(),
+      reg_exps_->regexp_factory_->CreateInput(number_copy));
+  if (reg_exps_->extn_pattern_->Consume(number_copy_as_regexp_input.get(),
                             false,
                             &possible_extension_one,
                             &possible_extension_two,
                             &possible_extension_three)) {
     // Replace the extensions in the original string here.
-    extn_pattern->Replace(&number_copy, "");
+    reg_exps_->extn_pattern_->Replace(&number_copy, "");
     VLOG(4) << "Found an extension. Possible extension one: "
             << possible_extension_one
             << ". Possible extension two: " << possible_extension_two
       const PhoneNumberDesc& general_num_desc =
           default_region_metadata->general_desc();
       const RegExp& valid_number_pattern =
-          regexp_cache->GetRegExp(general_num_desc.national_number_pattern());
+          reg_exps_->regexp_cache_->GetRegExp(
+              general_num_desc.national_number_pattern());
       MaybeStripNationalPrefixAndCarrierCode(*default_region_metadata,
                                              &potential_national_number,
                                              NULL);
       VLOG(4) << "Number without country calling code prefix: "
               << potential_national_number;
-      const RegExp& possible_number_pattern = regexp_cache->GetRegExp(
-          StrCat("(", general_num_desc.possible_number_pattern(), ")"));
+      const RegExp& possible_number_pattern =
+          reg_exps_->regexp_cache_->GetRegExp(
+              StrCat("(", general_num_desc.possible_number_pattern(), ")"));
       // If the number was not valid before but is valid now, or if it was too
       // long before, we consider the number with the country code stripped to
       // be a better result and keep that instead.
   }
   const PhoneMetadata* metadata = GetMetadataForRegion(region_code);
   return !IsNumberMatchingDesc(
-      national_significant_number, metadata->no_international_dialling());
+      national_significant_number, metadata->no_international_dialling(),
+      reg_exps_->regexp_cache_.get());
 }
 
 }  // namespace phonenumbers

cpp/src/phonenumbers/phonenumberutil.h

 class Logger;
 class NumberFormat;
 class PhoneMetadata;
-class PhoneMetadataCollection;
-class PhoneNumber;
+class PhoneNumberMatcherRegExps;
+class PhoneNumberRegExpsAndMappings;
 class RegExp;
 
 // NOTE: A lot of methods in this class require Region Code strings. These must
   friend class PhoneNumberMatcher;
   friend class PhoneNumberMatcherRegExps;
   friend class PhoneNumberMatcherTest;
+  friend class PhoneNumberRegExpsAndMappings;
   friend class PhoneNumberUtilTest;
  public:
   ~PhoneNumberUtil();
   static PhoneNumberUtil* GetInstance();
 #endif
 
-  // Initialisation helper function used to populate the regular expressions in
-  // a defined order.
-  void CreateRegularExpressions() const;
-
   // Returns true if the number is a valid vanity (alpha) number such as 800
   // MICROSOFT. A valid vanity number will start with at least 3 digits and will
   // have three or more alpha characters. This does not do region-specific
   bool IsLeadingZeroPossible(int country_calling_code) const;
 
  private:
+  scoped_ptr<Logger> logger_;
+
   typedef pair<int, list<string>*> IntRegionsPair;
 
   // The minimum and maximum length of the national significant number.
   // This corresponds to SECOND_NUMBER_START in the java version.
   static const char kCaptureUpToSecondNumberStart[];
 
+  // Helper class holding useful regular expressions and character mappings.
+  scoped_ptr<PhoneNumberRegExpsAndMappings> reg_exps_;
+
   // A mapping from a country calling code to a RegionCode object which denotes
   // the region represented by that country calling code. Note regions under
   // NANPA share the country calling code 1 and Russia and Kazakhstan share the

cpp/src/phonenumbers/region_code.h

 class RegionCode {
  public:
   // Returns a region code string representing the "unknown" region.
-  static const string& GetUnknown() {
-    static const string s = "ZZ";
-    return s;
+  static const char* GetUnknown() {
+    return "ZZ";
   }
 };
 

cpp/test/phonenumbers/run_tests.cc

+// Copyright (C) 2011 The Libphonenumber Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 #include <gtest/gtest.h>
 
 int main(int argc, char** argv) {

cpp/test/phonenumbers/test_util.h

 
 ostream& operator<<(ostream& os, const vector<PhoneNumber>& numbers);
 
-// Class containing string constants of region codes for easier testing.
+// Class containing string constants of region codes for easier testing. Note
+// that another private RegionCode class is defined in
+// cpp/src/phonenumbers/region_code.h. This one contains more constants.
 class RegionCode {
  public:
-  static const string& AD() {
-    static const string s = "AD";
-    return s;
+  static const char* AD() {
+    return "AD";
   }
 
-  static const string& AO() {
-    static const string s = "AO";
-    return s;
+  static const char* AO() {
+    return "AO";
   }
 
-  static const string& AQ() {
-    static const string s = "AQ";
-    return s;
+  static const char* AQ() {
+    return "AQ";
   }
 
-  static const string& AR() {
-    static const string s = "AR";
-    return s;
+  static const char* AR() {
+    return "AR";
   }
 
-  static const string& AU() {
-    static const string s = "AU";
-    return s;
+  static const char* AU() {
+    return "AU";
   }
 
-  static const string& BS() {
-    static const string s = "BS";
-    return s;
+  static const char* BS() {
+    return "BS";
   }
 
-  static const string& CA() {
-    static const string s = "CA";
-    return s;
+  static const char* CA() {
+    return "CA";
   }
 
-  static const string& CN() {
-    static const string s = "CN";
-    return s;
+  static const char* CN() {
+    return "CN";
   }
 
-  static const string& CS() {
-    static const string s = "CS";
-    return s;
+  static const char* CS() {
+    return "CS";
   }
 
-  static const string& DE() {
-    static const string s = "DE";
-    return s;
+  static const char* DE() {
+    return "DE";
   }
 
-  static const string& GB() {
-    static const string s = "GB";
-    return s;
+  static const char* GB() {
+    return "GB";
   }
 
-  static const string& IT() {
-    static const string s = "IT";
-    return s;
+  static const char* IT() {
+    return "IT";
   }
 
-  static const string& JP() {
-    static const string s = "JP";
-    return s;
+  static const char* JP() {
+    return "JP";
   }
 
-  static const string& KR() {
-    static const string s = "KR";
-    return s;
+  static const char* KR() {
+    return "KR";
   }
 
-  static const string& MX() {
-    static const string s = "MX";
-    return s;
+  static const char* MX() {
+    return "MX";
   }
 
-  static const string& NZ() {
-    static const string s = "NZ";
-    return s;
+  static const char* NZ() {
+    return "NZ";
   }
 
-  static const string& PL() {
-    static const string s = "PL";
-    return s;
+  static const char* PL() {
+    return "PL";
   }
 
-  static const string& RE() {
-    static const string s = "RE";
-    return s;
+  static const char* RE() {
+    return "RE";
   }
 
-  static const string& SG() {
-    static const string s = "SG";
-    return s;
+  static const char* SG() {
+    return "SG";
   }
 
-  static const string& US() {
-    static const string s = "US";
-    return s;
+  static const char* US() {
+    return "US";
   }
 
-  static const string& YT() {
-    static const string s = "YT";
-    return s;
+  static const char* YT() {
+    return "YT";
   }
 
   // Returns a region code string representing the "unknown" region.
-  static const string& GetUnknown() {
-    static const string s = "ZZ";
-    return s;
+  static const char* GetUnknown() {
+    return "ZZ";
   }
 
-  static const string& ZZ() {
+  static const char* ZZ() {
     return GetUnknown();
   }
 };