sparklemotion/nokogiri

View on GitHub
ext/nokogiri/xml_xpath_context.c

Summary

Maintainability
Test Coverage
#include <nokogiri.h>

VALUE cNokogiriXmlXpathContext;

/*
 * these constants have matching declarations in
 * ext/java/nokogiri/internals/NokogiriNamespaceContext.java
 */
static const xmlChar *NOKOGIRI_PREFIX = (const xmlChar *)"nokogiri";
static const xmlChar *NOKOGIRI_URI = (const xmlChar *)"http://www.nokogiri.org/default_ns/ruby/extensions_functions";
static const xmlChar *NOKOGIRI_BUILTIN_PREFIX = (const xmlChar *)"nokogiri-builtin";
static const xmlChar *NOKOGIRI_BUILTIN_URI = (const xmlChar *)"https://www.nokogiri.org/default_ns/ruby/builtins";

static void
xml_xpath_context_deallocate(void *data)
{
  xmlXPathContextPtr c_context = data;
  xmlXPathFreeContext(c_context);
}

static const rb_data_type_t xml_xpath_context_type = {
  .wrap_struct_name = "xmlXPathContext",
  .function = {
    .dfree = xml_xpath_context_deallocate,
  },
  .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
};

/* find a CSS class in an HTML element's `class` attribute */
static const xmlChar *
builtin_css_class(const xmlChar *str, const xmlChar *val)
{
  int val_len;

  if (str == NULL) { return (NULL); }
  if (val == NULL) { return (NULL); }

  val_len = xmlStrlen(val);
  if (val_len == 0) { return (str); }

  while (*str != 0) {
    if ((*str == *val) && !xmlStrncmp(str, val, val_len)) {
      const xmlChar *next_byte = str + val_len;

      /* only match if the next byte is whitespace or end of string */
      if ((*next_byte == 0) || (IS_BLANK_CH(*next_byte))) {
        return ((const xmlChar *)str);
      }
    }

    /* advance str to whitespace */
    while ((*str != 0) && !IS_BLANK_CH(*str)) {
      str++;
    }

    /* advance str to start of next word or end of string */
    while ((*str != 0) && IS_BLANK_CH(*str)) {
      str++;
    }
  }

  return (NULL);
}

/* xmlXPathFunction to wrap builtin_css_class() */
static void
xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs)
{
  xmlXPathObjectPtr hay, needle;

  CHECK_ARITY(2);

  CAST_TO_STRING;
  needle = valuePop(ctxt);
  if ((needle == NULL) || (needle->type != XPATH_STRING)) {
    xmlXPathFreeObject(needle);
    XP_ERROR(XPATH_INVALID_TYPE);
  }

  CAST_TO_STRING;
  hay = valuePop(ctxt);
  if ((hay == NULL) || (hay->type != XPATH_STRING)) {
    xmlXPathFreeObject(hay);
    xmlXPathFreeObject(needle);
    XP_ERROR(XPATH_INVALID_TYPE);
  }

  if (builtin_css_class(hay->stringval, needle->stringval)) {
    valuePush(ctxt, xmlXPathNewBoolean(1));
  } else {
    valuePush(ctxt, xmlXPathNewBoolean(0));
  }

  xmlXPathFreeObject(hay);
  xmlXPathFreeObject(needle);
}


/* xmlXPathFunction to select nodes whose local name matches, for HTML5 CSS queries that should
 * ignore namespaces */
static void
xpath_builtin_local_name_is(xmlXPathParserContextPtr ctxt, int nargs)
{
  xmlXPathObjectPtr element_name;

  assert(ctxt->context->node);

  CHECK_ARITY(1);
  CAST_TO_STRING;
  CHECK_TYPE(XPATH_STRING);
  element_name = valuePop(ctxt);

  valuePush(
    ctxt,
    xmlXPathNewBoolean(xmlStrEqual(ctxt->context->node->name, element_name->stringval))
  );

  xmlXPathFreeObject(element_name);
}


/*
 * call-seq:
 *   register_ns(prefix, uri) → Nokogiri::XML::XPathContext
 *
 * Register the namespace with +prefix+ and +uri+ for use in future queries.
 *
 * [Returns] +self+
 */
static VALUE
rb_xml_xpath_context_register_ns(VALUE rb_context, VALUE prefix, VALUE uri)
{
  xmlXPathContextPtr c_context;

  TypedData_Get_Struct(
    rb_context,
    xmlXPathContext,
    &xml_xpath_context_type,
    c_context
  );

  xmlXPathRegisterNs(c_context,
                     (const xmlChar *)StringValueCStr(prefix),
                     (const xmlChar *)StringValueCStr(uri)
                    );
  return rb_context;
}

/*
 * call-seq:
 *   register_variable(name, value) → Nokogiri::XML::XPathContext
 *
 * Register the variable +name+ with +value+ for use in future queries.
 *
 * [Returns] +self+
 */
static VALUE
rb_xml_xpath_context_register_variable(VALUE rb_context, VALUE name, VALUE value)
{
  xmlXPathContextPtr c_context;
  xmlXPathObjectPtr xmlValue;

  TypedData_Get_Struct(
    rb_context,
    xmlXPathContext,
    &xml_xpath_context_type,
    c_context
  );

  xmlValue = xmlXPathNewCString(StringValueCStr(value));

  xmlXPathRegisterVariable(
    c_context,
    (const xmlChar *)StringValueCStr(name),
    xmlValue
  );

  return rb_context;
}


/*
 *  convert an XPath object into a Ruby object of the appropriate type.
 *  returns Qundef if no conversion was possible.
 */
static VALUE
xpath2ruby(xmlXPathObjectPtr c_xpath_object, xmlXPathContextPtr c_context)
{
  VALUE rb_retval;

  assert(c_context->doc);
  assert(DOC_RUBY_OBJECT_TEST(c_context->doc));

  switch (c_xpath_object->type) {
    case XPATH_STRING:
      rb_retval = NOKOGIRI_STR_NEW2(c_xpath_object->stringval);
      xmlFree(c_xpath_object->stringval);
      return rb_retval;

    case XPATH_NODESET:
      return noko_xml_node_set_wrap(
               c_xpath_object->nodesetval,
               DOC_RUBY_OBJECT(c_context->doc)
             );

    case XPATH_NUMBER:
      return rb_float_new(c_xpath_object->floatval);

    case XPATH_BOOLEAN:
      return (c_xpath_object->boolval == 1) ? Qtrue : Qfalse;

    default:
      return Qundef;
  }
}

void
Nokogiri_marshal_xpath_funcall_and_return_values(
  xmlXPathParserContextPtr ctxt,
  int argc,
  VALUE rb_xpath_handler,
  const char *method_name
)
{
  VALUE rb_retval;
  VALUE *argv;
  VALUE rb_node_set = Qnil;
  xmlNodeSetPtr c_node_set = NULL;
  xmlXPathObjectPtr c_xpath_object;

  assert(ctxt->context->doc);
  assert(DOC_RUBY_OBJECT_TEST(ctxt->context->doc));

  argv = (VALUE *)ruby_xcalloc((size_t)argc, sizeof(VALUE));
  for (int j = 0 ; j < argc ; ++j) {
    rb_gc_register_address(&argv[j]);
  }

  for (int j = argc - 1 ; j >= 0 ; --j) {
    c_xpath_object = valuePop(ctxt);
    argv[j] = xpath2ruby(c_xpath_object, ctxt->context);
    if (argv[j] == Qundef) {
      argv[j] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(c_xpath_object));
    }
    xmlXPathFreeNodeSetList(c_xpath_object);
  }

  rb_retval = rb_funcall2(
                rb_xpath_handler,
                rb_intern((const char *)method_name),
                argc,
                argv
              );

  for (int j = 0 ; j < argc ; ++j) {
    rb_gc_unregister_address(&argv[j]);
  }
  ruby_xfree(argv);

  switch (TYPE(rb_retval)) {
    case T_FLOAT:
    case T_BIGNUM:
    case T_FIXNUM:
      xmlXPathReturnNumber(ctxt, NUM2DBL(rb_retval));
      break;
    case T_STRING:
      xmlXPathReturnString(ctxt, xmlCharStrdup(StringValueCStr(rb_retval)));
      break;
    case T_TRUE:
      xmlXPathReturnTrue(ctxt);
      break;
    case T_FALSE:
      xmlXPathReturnFalse(ctxt);
      break;
    case T_NIL:
      break;
    case T_ARRAY: {
      VALUE construct_args[2] = { DOC_RUBY_OBJECT(ctxt->context->doc), rb_retval };
      rb_node_set = rb_class_new_instance(2, construct_args, cNokogiriXmlNodeSet);
      c_node_set = noko_xml_node_set_unwrap(rb_node_set);
      xmlXPathReturnNodeSet(ctxt, xmlXPathNodeSetMerge(NULL, c_node_set));
    }
    break;
    case T_DATA:
      if (rb_obj_is_kind_of(rb_retval, cNokogiriXmlNodeSet)) {
        c_node_set = noko_xml_node_set_unwrap(rb_retval);
        /* Copy the node set, otherwise it will get GC'd. */
        xmlXPathReturnNodeSet(ctxt, xmlXPathNodeSetMerge(NULL, c_node_set));
        break;
      }
    default:
      rb_raise(rb_eRuntimeError, "Invalid return type");
  }
}

static void
method_caller(xmlXPathParserContextPtr ctxt, int argc)
{
  VALUE rb_xpath_handler = Qnil;
  const char *method_name = NULL ;

  assert(ctxt);
  assert(ctxt->context);
  assert(ctxt->context->userData);
  assert(ctxt->context->function);

  rb_xpath_handler = (VALUE)(ctxt->context->userData);
  method_name = (const char *)(ctxt->context->function);

  Nokogiri_marshal_xpath_funcall_and_return_values(
    ctxt,
    argc,
    rb_xpath_handler,
    method_name
  );
}

static xmlXPathFunction
handler_lookup(void *data, const xmlChar *c_name, const xmlChar *c_ns_uri)
{
  VALUE rb_handler = (VALUE)data;
  if (rb_respond_to(rb_handler, rb_intern((const char *)c_name))) {
    if (c_ns_uri == NULL) {
      NOKO_WARN_DEPRECATION("A custom XPath or CSS handler function named '%s' is being invoked without a namespace. Please update your query to reference this function as 'nokogiri:%s'. Invoking custom handler functions without a namespace is deprecated and will become an error in Nokogiri v1.17.0.",
                            c_name, c_name); // deprecated in v1.15.0, remove in v1.17.0
    }
    return method_caller;
  }

  return NULL;
}

PRINTFLIKE_DECL(2, 3)
static void
generic_exception_pusher(void *data, const char *msg, ...)
{
  VALUE rb_errors = (VALUE)data;
  VALUE rb_message;
  VALUE rb_exception;

  Check_Type(rb_errors, T_ARRAY);

#ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
  /* It is not currently possible to pass var args from native
     functions to sulong, so we work around the issue here. */
  rb_message = rb_sprintf("generic_exception_pusher: %s", msg);
#else
  va_list args;
  va_start(args, msg);
  rb_message = rb_vsprintf(msg, args);
  va_end(args);
#endif

  rb_exception = rb_exc_new_str(cNokogiriXmlXpathSyntaxError, rb_message);
  rb_ary_push(rb_errors, rb_exception);
}

/*
 * call-seq:
 *   evaluate(search_path, handler = nil) → Object
 *
 * Evaluate the +search_path+ query.
 *
 * [Returns] an object of the appropriate type for the query, which could be +NodeSet+, a +String+,
 * a +Float+, or a boolean.
 */
static VALUE
rb_xml_xpath_context_evaluate(int argc, VALUE *argv, VALUE rb_context)
{
  VALUE search_path, xpath_handler;
  VALUE retval = Qnil;
  xmlXPathContextPtr c_context;
  xmlXPathObjectPtr xpath;
  xmlChar *query;
  VALUE errors = rb_ary_new();

  TypedData_Get_Struct(
    rb_context,
    xmlXPathContext,
    &xml_xpath_context_type,
    c_context
  );

  if (rb_scan_args(argc, argv, "11", &search_path, &xpath_handler) == 1) {
    xpath_handler = Qnil;
  }

  query = (xmlChar *)StringValueCStr(search_path);

  if (Qnil != xpath_handler) {
    /* FIXME: not sure if this is the correct place to shove private data. */
    c_context->userData = (void *)xpath_handler;
    xmlXPathRegisterFuncLookup(
      c_context,
      handler_lookup,
      (void *)xpath_handler
    );
  }

  xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
  xmlSetGenericErrorFunc((void *)errors, generic_exception_pusher);

  xpath = xmlXPathEvalExpression(query, c_context);

  xmlSetStructuredErrorFunc(NULL, NULL);
  xmlSetGenericErrorFunc(NULL, NULL);

  if (xpath == NULL) {
    rb_exc_raise(rb_ary_entry(errors, 0));
  }

  retval = xpath2ruby(xpath, c_context);
  if (retval == Qundef) {
    retval = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(c_context->doc));
  }

  xmlXPathFreeNodeSetList(xpath);

  return retval;
}

/*
 * call-seq:
 *   new(node)
 *
 * Create a new XPathContext with +node+ as the context node.
 */
static VALUE
rb_xml_xpath_context_new(VALUE klass, VALUE rb_node)
{
  xmlNodePtr node;
  xmlXPathContextPtr c_context;
  VALUE rb_context;

  Noko_Node_Get_Struct(rb_node, xmlNode, node);

#if LIBXML_VERSION < 21000
  /* deprecated in 40483d0 */
  xmlXPathInit();
#endif

  c_context = xmlXPathNewContext(node->doc);
  c_context->node = node;

  xmlXPathRegisterNs(c_context, NOKOGIRI_PREFIX, NOKOGIRI_URI);
  xmlXPathRegisterNs(c_context, NOKOGIRI_BUILTIN_PREFIX, NOKOGIRI_BUILTIN_URI);
  xmlXPathRegisterFuncNS(
    c_context,
    (const xmlChar *)"css-class",
    NOKOGIRI_BUILTIN_URI,
    xpath_builtin_css_class
  );
  xmlXPathRegisterFuncNS(
    c_context,
    (const xmlChar *)"local-name-is",
    NOKOGIRI_BUILTIN_URI,
    xpath_builtin_local_name_is
  );

  rb_context = TypedData_Wrap_Struct(
                 klass,
                 &xml_xpath_context_type,
                 c_context
               );
  return rb_context;
}

void
noko_init_xml_xpath_context(void)
{
  /*
   * XPathContext is the entry point for searching a +Document+ by using XPath.
   */
  cNokogiriXmlXpathContext = rb_define_class_under(mNokogiriXml, "XPathContext", rb_cObject);

  rb_undef_alloc_func(cNokogiriXmlXpathContext);

  rb_define_singleton_method(cNokogiriXmlXpathContext, "new", rb_xml_xpath_context_new, 1);

  rb_define_method(cNokogiriXmlXpathContext, "evaluate", rb_xml_xpath_context_evaluate, -1);
  rb_define_method(cNokogiriXmlXpathContext, "register_variable", rb_xml_xpath_context_register_variable, 2);
  rb_define_method(cNokogiriXmlXpathContext, "register_ns", rb_xml_xpath_context_register_ns, 2);
}