sparklemotion/nokogiri

View on GitHub
ext/nokogiri/xml_sax_parser_context.c

Summary

Maintainability
Test Coverage
#include <nokogiri.h>

VALUE cNokogiriXmlSaxParserContext ;

static ID id_read;

static void
xml_sax_parser_context_free(void *data)
{
  xmlParserCtxtPtr ctxt = data;
  ctxt->sax = NULL;
  xmlFreeParserCtxt(ctxt);
}

/*
 *  note that htmlParserCtxtPtr == xmlParserCtxtPtr and xmlFreeParserCtxt() == htmlFreeParserCtxt()
 *  so we use this type for both XML::SAX::ParserContext and HTML::SAX::ParserContext
 */
static const rb_data_type_t xml_parser_ctxt_type = {
  .wrap_struct_name = "xmlParserCtxt",
  .function = {
    .dfree = xml_sax_parser_context_free,
  },
  .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
};

xmlParserCtxtPtr
noko_xml_sax_parser_context_unwrap(VALUE rb_context)
{
  xmlParserCtxtPtr c_context;
  TypedData_Get_Struct(rb_context, xmlParserCtxt, &xml_parser_ctxt_type, c_context);
  return c_context;
}

VALUE
noko_xml_sax_parser_context_wrap(VALUE klass, xmlParserCtxtPtr c_context)
{
  return TypedData_Wrap_Struct(klass, &xml_parser_ctxt_type, c_context);
}


/*
 * call-seq:
 *  parse_io(io, encoding)
 *
 * Parse +io+ object with +encoding+
 */
static VALUE
parse_io(VALUE klass, VALUE io, VALUE encoding)
{
  xmlParserCtxtPtr ctxt;
  xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);

  if (!rb_respond_to(io, id_read)) {
    rb_raise(rb_eTypeError, "argument expected to respond to :read");
  }

  ctxt = xmlCreateIOParserCtxt(NULL, NULL,
                               (xmlInputReadCallback)noko_io_read,
                               (xmlInputCloseCallback)noko_io_close,
                               (void *)io, enc);
  if (!ctxt) {
    rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
  }

  if (ctxt->sax) {
    xmlFree(ctxt->sax);
    ctxt->sax = NULL;
  }

  return noko_xml_sax_parser_context_wrap(klass, ctxt);
}

/*
 * call-seq:
 *  parse_file(filename)
 *
 * Parse file given +filename+
 */
static VALUE
parse_file(VALUE klass, VALUE filename)
{
  xmlParserCtxtPtr ctxt = xmlCreateFileParserCtxt(StringValueCStr(filename));

  if (ctxt->sax) {
    xmlFree(ctxt->sax);
    ctxt->sax = NULL;
  }

  return noko_xml_sax_parser_context_wrap(klass, ctxt);
}

/*
 * call-seq:
 *  parse_memory(data)
 *
 * Parse the XML stored in memory in +data+
 */
static VALUE
parse_memory(VALUE klass, VALUE data)
{
  xmlParserCtxtPtr ctxt;

  Check_Type(data, T_STRING);

  if (!(int)RSTRING_LEN(data)) {
    rb_raise(rb_eRuntimeError, "data cannot be empty");
  }

  ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(data),
                                   (int)RSTRING_LEN(data));
  if (ctxt->sax) {
    xmlFree(ctxt->sax);
    ctxt->sax = NULL;
  }

  return noko_xml_sax_parser_context_wrap(klass, ctxt);
}

static VALUE
parse_doc(VALUE ctxt_val)
{
  xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
  xmlParseDocument(ctxt);
  return Qnil;
}

static VALUE
parse_doc_finalize(VALUE ctxt_val)
{
  xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;

  if (NULL != ctxt->myDoc) {
    xmlFreeDoc(ctxt->myDoc);
  }

  NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
  return Qnil;
}

/*
 * call-seq:
 *  parse_with(sax_handler)
 *
 * Use +sax_handler+ and parse the current document
 */
static VALUE
parse_with(VALUE self, VALUE sax_handler)
{
  xmlParserCtxtPtr ctxt;
  xmlSAXHandlerPtr sax;

  if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser)) {
    rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
  }

  ctxt = noko_xml_sax_parser_context_unwrap(self);
  sax = noko_sax_handler_unwrap(sax_handler);

  ctxt->sax = sax;
  ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);

  xmlSetStructuredErrorFunc(NULL, NULL);

  rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);

  return Qnil;
}

/*
 * call-seq:
 *  replace_entities=(boolean)
 *
 * Should this parser replace entities?  &amp; will get converted to '&' if
 * set to true
 */
static VALUE
set_replace_entities(VALUE self, VALUE value)
{
  xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);

  if (Qfalse == value) {
    ctxt->replaceEntities = 0;
  } else {
    ctxt->replaceEntities = 1;
  }

  return value;
}

/*
 * call-seq:
 *  replace_entities
 *
 * Should this parser replace entities?  &amp; will get converted to '&' if
 * set to true
 */
static VALUE
get_replace_entities(VALUE self)
{
  xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);

  if (0 == ctxt->replaceEntities) {
    return Qfalse;
  } else {
    return Qtrue;
  }
}

/*
 * call-seq: line
 *
 * Get the current line the parser context is processing.
 */
static VALUE
line(VALUE self)
{
  xmlParserInputPtr io;
  xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);

  io = ctxt->input;
  if (io) {
    return INT2NUM(io->line);
  }

  return Qnil;
}

/*
 * call-seq: column
 *
 * Get the current column the parser context is processing.
 */
static VALUE
column(VALUE self)
{
  xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
  xmlParserInputPtr io;

  io = ctxt->input;
  if (io) {
    return INT2NUM(io->col);
  }

  return Qnil;
}

/*
 * call-seq:
 *  recovery=(boolean)
 *
 * Should this parser recover from structural errors? It will not stop processing
 * file on structural errors if set to true
 */
static VALUE
set_recovery(VALUE self, VALUE value)
{
  xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);

  if (value == Qfalse) {
    ctxt->recovery = 0;
  } else {
    ctxt->recovery = 1;
  }

  return value;
}

/*
 * call-seq:
 *  recovery
 *
 * Should this parser recover from structural errors? It will not stop processing
 * file on structural errors if set to true
 */
static VALUE
get_recovery(VALUE self)
{
  xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);

  if (ctxt->recovery == 0) {
    return Qfalse;
  } else {
    return Qtrue;
  }
}

void
noko_init_xml_sax_parser_context(void)
{
  cNokogiriXmlSaxParserContext = rb_define_class_under(mNokogiriXmlSax, "ParserContext", rb_cObject);

  rb_undef_alloc_func(cNokogiriXmlSaxParserContext);

  rb_define_singleton_method(cNokogiriXmlSaxParserContext, "io", parse_io, 2);
  rb_define_singleton_method(cNokogiriXmlSaxParserContext, "memory", parse_memory, 1);
  rb_define_singleton_method(cNokogiriXmlSaxParserContext, "file", parse_file, 1);

  rb_define_method(cNokogiriXmlSaxParserContext, "parse_with", parse_with, 1);
  rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities=", set_replace_entities, 1);
  rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities", get_replace_entities, 0);
  rb_define_method(cNokogiriXmlSaxParserContext, "recovery=", set_recovery, 1);
  rb_define_method(cNokogiriXmlSaxParserContext, "recovery", get_recovery, 0);
  rb_define_method(cNokogiriXmlSaxParserContext, "line", line, 0);
  rb_define_method(cNokogiriXmlSaxParserContext, "column", column, 0);

  id_read = rb_intern("read");
}