bllip-parser / second-stage / programs / eval-weights / sym.cc

// Licensed under the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License.  You may obtain
// a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
// License for the specific language governing permissions and limitations
// under the License.

// sym.cc
//
// (c) Mark Johnson, 10th March 2001
// (c) Mark Johnson, 12th December 2001 (fix static initialization order bug)
// (c) Mark Johnson, 4th May 2002 (write/read invariance)
// (c) Mark Johnson, 16th July 2002 (g++ 3.1 namespace compatibility)
// (c) Mark Johnson, 15th August 2002 (added test code)
// (c) Mark Johnson, 20th August 2002 (fixed EOF bug)

// #define MAIN   // uncomment this to include the main() test program below

#include "custom_allocator.h"       // must be first

#include "sym.h"
#include <cctype>

#define ESCAPE     '\\'
#define OPENQUOTE  '\"'
#define CLOSEQUOTE '\"'
#define UNDEFINED  "%UNDEFINED%"           // UNDEFINED must start with punctuation

// define these as local static variables to avoid static initialization order bugs
//
symbol::Table& symbol::table() 
{
  static Table table_(65536);   // default table size
  return table_;
}

symbol::symbol(const std::string& s) : sp(&*(table().insert(s).first)) { };

symbol::symbol(const char* cp) { 
  if (cp) {
    std::string s(cp); 
    sp = &*(table().insert(s).first);
  }
  else
    sp = NULL;
};


// Read/write code

inline static bool dont_escape(char c) { 
  return isgraph(c) && c != '%' && c != '(' && c != ')'
    && c != ESCAPE && c != OPENQUOTE && c != CLOSEQUOTE;
}

inline static char escaped_char(char c) {
  switch (c) {
  case 'a': return('\a');
  case 'b': return('\b');
  case 'f': return('\f');
  case 'n': return('\n');
  case 'r': return('\r');
  case 't': return('\t');
  case 'v': return('\v');
  default: return c;
  }
  return c;
}

std::istream& operator>> (std::istream& is, symbol& s)
{
  std::string str;
  char c;
  if (!(is >> c)) return is;           // If read fails, return error
  if (dont_escape(c) || c == ESCAPE) { // Recognize a normal symbol
    do {
      if (c == ESCAPE) {
	if (!is.get(c)) return is;     //  Read next character; return if read fails.
	str.push_back(escaped_char(c));//  Push escaped char onto string.
      }
      else
	str.push_back(c);
    }
    while (is.get(c) && (dont_escape(c) || c == ESCAPE));
    if (!is.fail())                    //  Did we read one too many chars?
      is.putback(c);                   //   Yes.  Put it back.
    else if (is.eof())                 //  Are we at eof?
      is.clear(is.rdstate() & ~std::ios::failbit & ~std::ios::eofbit);
    s = symbol(str);                   //  Load string into symbol
  }
  else if (c == OPENQUOTE) {           // Recognize a quoted string
    if (!is.get(c)) return is;         //  Read next character; return if read fails
    while (c != CLOSEQUOTE) {
      if (c == ESCAPE) {               //  Is this character the escape character?
	if (!is.get(c)) return is;     //   Yes.  Get quoted character.
	str.push_back(escaped_char(c));//   Push character onto string.
      }
      else
	str.push_back(c);              //   Push back ordinary character.
      if (!is.get(c)) return is;       //  Read next character.
    }
    s = symbol(str);                   //  Load string into symbol
  }
  else if (c == UNDEFINED[0]) {
    for (const char* cp = &UNDEFINED[1]; *cp; ++cp)
      if (!is.get(c) || c != *cp) {
	is.clear(std::ios::failbit);   //  We didn't get the whole UNDEFINED symbol
	return is;
      }
    s = symbol::undefined();           //  Set s to undefined
  }
  else {                               // c doesn't begin a symbol
    is.putback(c);                     // put it back onto the stream
    is.clear(std::ios::failbit);       // set the fail bit
  }
  return is;
}


std::ostream& operator<< (std::ostream& os, const symbol s)
{
  if (s.is_undefined())
    os << UNDEFINED;
  else {
    const std::string& str = s.string_reference();
    if (str.empty())
      os << OPENQUOTE << CLOSEQUOTE;
    else
      for (std::string::const_iterator si = str.begin(); si != str.end(); ++si) {
	if (!dont_escape(*si))
	  os.put(ESCAPE);
	os.put(*si);
      }
  }
  return os;
}

#ifdef MAIN

// The rest of this file contains a program that tests the write/read properties of symbols

#include <sstream>
#include <vector>
#include "utility.h"

int main(int argc, char** argv) {
  const size_t ns = 1000000;

  char *syms[] = { "Hello world", "1", "2.0e-5", "this", "is", "a", "test" };
  const size_t nsyms = sizeof(syms)/sizeof(syms[0]);
  typedef ext::hash_set<symbol> sS;
  sS s;
  for (size_t i = 0; i < nsyms; ++i)
    s.insert(symbol(syms[i]));
  s.insert(symbol::undefined());
  
  std::ostringstream os;
  os << s;

  // create a lot of symbols
  //
  std::vector<symbol> ss;
  for (size_t i = 0; i < ns; ++i) {
    std::ostringstream os1;
    os1 << i;
    ss.push_back(symbol(os1.str()));  // ss will resize several times
  }  
  
  std::istringstream is(os.str());
  sS s1;
  is >> s1;
  if (s1 != s) {
    std::cerr << "Oops: these two sets are different!\n" << s << '\n' << s1 << std::endl;
    exit(EXIT_FAILURE);
  }
  
  for (size_t i = 0; i < ns; ++i) {
    std::ostringstream os1;
    os1 << i;
    if (ss[i] != symbol(os1.str())) {
      std::cerr << "Oops: os1.str() = " << os1.str() 
		<< ", ss[" << i << "] = " << ss[i] << std::endl;
      exit(EXIT_FAILURE);
    }
  }
}

#endif // MAIN
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.