rubinius/rubinius

View on GitHub
machine/symbol_table.cpp

Summary

Maintainability
Test Coverage
#include "oniguruma.h" // Must be first.
#include "regenc.h"
#include "transcoder.h"

#include "symbol_table.hpp"
#include "exception.hpp"
#include "configuration.hpp"

#include "class/array.hpp"
#include "class/encoding.hpp"
#include "class/exception.hpp"
#include "class/string.hpp"
#include "class/symbol.hpp"

#include "diagnostics/memory.hpp"

#include "logger.hpp"

#include <sstream>
#include <iomanip>
#include <mutex>

namespace rubinius {
  SymbolTable::SymbolTable()
    : diagnostic_(new diagnostics::SymbolTable())
    , lock_()
  {
  }

  SymbolTable::~SymbolTable() {
    if(diagnostic_) {
      delete diagnostic_;
      diagnostic_ = nullptr;
    }
  }

  void SymbolTable::sweep(STATE) {
    if(state->configuration()->diagnostics_memory_enabled) {
      diagnostic_->update();
      state->machine()->report_diagnostics(diagnostic_);
    }
  }

  SymbolTable::Kind SymbolTable::detect_kind(STATE, const Symbol* sym) {
    std::string str = strings[sym->index()];
    size_t size = str.size();
    uint8_t* p = reinterpret_cast<uint8_t*>(const_cast<char*>(str.c_str()));

    Encoding* e = Encoding::from_index(state, encodings[sym->index()]);
    OnigEncodingType* enc = e->encoding();

    // Constants start with A-Z, followed by alphanumeric characters or '_' or
    // non-ascii character.
    if(isupper(*p)) {
      uint8_t* e = p + size;
      int n = 0, code = 0;

      for(++p; p < e; p += n) {
        n = Encoding::precise_mbclen(p, e, enc);
        if(!ONIGENC_MBCLEN_CHARFOUND_P(n)) {
          return SymbolTable::eNormal;
        }

        n = ONIGENC_MBCLEN_CHARFOUND_LEN(n);
        code = ONIGENC_MBC_TO_CODE(enc, p, p + n);
        if(!(ONIGENC_IS_CODE_ALNUM(enc, code) || *p == '_' || !ISASCII(*p))) {
          return SymbolTable::eNormal;
        }
      }

      return SymbolTable::eConstant;
    }

    if(p[0] == '@') {
      // A class variable begins with @@
      if(size > 1 && p[1] == '@') {
        return SymbolTable::eCVar;
      }

      // An instance variable can't start with a digit and can't be just @.
      if((size == 1) || (size > 1 && ISDIGIT(p[1]))) {
        return SymbolTable::eNormal;
      }

      // An instance variable begins with @
      return SymbolTable::eIVar;
    }

    // A system variable begins with __
    if(size > 2 && p[0] == '_' && p[1] == '_') {
      return SymbolTable::eSystem;
    }

    // Everything else is normal
    return SymbolTable::eNormal;
  }

  SymbolTable::Kind SymbolTable::kind(STATE, const Symbol* sym) {
    std::lock_guard<locks::spinlock_mutex> guard(lock_);

    Kind k = kinds[sym->index()];

    if(k == eUnknown) {
      k = kinds[sym->index()] = detect_kind(state, sym);
    }

    return k;
  }

  size_t SymbolTable::add(STATE, std::string str, int enc) {
    size_t bytes = (str.size() + sizeof(std::string) + sizeof(int) + sizeof(Kind));
    diagnostic()->objects++;
    diagnostic()->bytes += bytes;

    strings.push_back(str);
    encodings.push_back(enc);
    kinds.push_back(eUnknown);

    state->diagnostics()->memory_metrics()->symbols++;
    state->diagnostics()->memory_metrics()->symbols_bytes += bytes;

    return strings.size() - 1;
  }

  Symbol* SymbolTable::lookup(STATE, const char* str, size_t length) {
    return lookup(state, str, length, Encoding::eAscii, state->hash_seed());
  }

  Symbol* SymbolTable::lookup(STATE, const std::string& str) {
    return lookup(state, str.data(), str.size(), Encoding::eAscii, state->hash_seed());
  }

  Symbol* SymbolTable::lookup(STATE, const char* str, size_t length,
      int enc, uint32_t seed)
  {
    size_t sym;

    hashval hash = String::hash_str((unsigned char*)str, length, seed);

    // Symbols can be looked up by multiple threads at the same time.
    // This is fast operation, so we protect this with a spinlock.
    {
      std::lock_guard<locks::spinlock_mutex> guard(lock_);

      SymbolMap::iterator entry = symbols.find(hash);
      if(entry == symbols.end()) {
        sym = add(state, std::string(str, length), enc);
        SymbolIds v(1, sym);
        symbols[hash] = v;
      } else {
        SymbolIds& v = entry->second;
        for(SymbolIds::const_iterator i = v.begin(); i != v.end(); ++i) {
          std::string& s = strings[*i];
          int e = encodings[*i];

          if(!strncmp(s.data(), str, length) && e == enc) return Symbol::from_index(*i);
        }
        sym = add(state, std::string(str, length), enc);
        v.push_back(sym);
      }
    }

    return Symbol::from_index(sym);
  }

  Symbol* SymbolTable::lookup(STATE, String* str) {
    if(str->nil_p()) {
      Exception::raise_argument_error(state, "Cannot look up Symbol from nil");
      return NULL;
    }

    // Since we also explicitly use the size, we can safely
    // use byte_address() here.
    const char* bytes = (const char*) str->byte_address();
    size_t size = str->byte_size();

    int enc = str->encoding(state)->index();

    if(CBOOL(str->ascii_only_p(state))) {
      enc = Encoding::eAscii;
    }
    return lookup(state, bytes, size, enc, state->hash_seed());
  }

  String* SymbolTable::lookup_string(STATE, const Symbol* sym) {
    std::lock_guard<locks::spinlock_mutex> guard(lock_);

    if(sym->nil_p()) {
      Exception::raise_argument_error(state, "Cannot look up Symbol from nil");
      return NULL;
    }

    size_t sym_index = sym->index();
    if(sym_index >= strings.size()) {
      return NULL;
    }
    std::string& str = strings[sym_index];
    int enc = encodings[sym_index];
    String* s = String::create(state, str.data(), str.size());
    s->encoding(state, Encoding::from_index(state, enc));
    return s;
  }

  std::string& SymbolTable::lookup_cppstring(const Symbol* sym) {
    std::lock_guard<locks::spinlock_mutex> guard(lock_);

    return strings[sym->index()];
  }

  int SymbolTable::lookup_encoding(const Symbol* sym) {
    std::lock_guard<locks::spinlock_mutex> guard(lock_);

    return encodings[sym->index()];
  }

  std::string SymbolTable::lookup_debug_string(const Symbol* sym) {
    std::lock_guard<locks::spinlock_mutex> guard(lock_);

    std::string str = strings[sym->index()];
    std::ostringstream os;
    unsigned char* cstr = (unsigned char*) str.data();
    size_t size = str.size();
    for(size_t i = 0; i < size; ++i) {
      if(isprint(cstr[i]) && isascii(cstr[i])) {
        os << cstr[i];
      } else {
        os << "\\x" << std::setw(2) << std::setfill('0') << std::hex << (unsigned int)cstr[i];
      }
    }
    return os.str();
  }

  Array* SymbolTable::all_as_array(STATE) {
    std::lock_guard<locks::spinlock_mutex> guard(lock_);

    size_t idx = 0;
    Array* ary = Array::create(state, strings.size());

    for(SymbolMap::iterator s = symbols.begin(); s != symbols.end(); ++s) {
      for(SymbolIds::iterator i = s->second.begin(); i != s->second.end(); ++i) {
        ary->set(state, idx++, Symbol::from_index(state, *i));
      }
    }

    return ary;
  }
}