sparklemotion/nokogiri

View on GitHub
ext/java/nokogiri/internals/ParserContext.java

Summary

Maintainability
A
1 hr
Test Coverage
package nokogiri.internals;

import static nokogiri.internals.NokogiriHelpers.rubyStringToString;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.util.concurrent.Callable;

import org.jruby.Ruby;
import org.jruby.RubyClass;
import org.jruby.RubyObject;
import org.jruby.RubyString;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
import org.jruby.util.ByteList;
import org.jruby.util.IOInputStream;
import org.xml.sax.InputSource;

/**
 * Base class for the various parser contexts.  Handles converting
 * Ruby objects to InputSource objects.
 *
 * @author Patrick Mahoney <pat@polycrystal.org>
 * @author Yoko Harada <yokolet@gmail.com>
 */
public abstract class ParserContext extends RubyObject
{
  private static final long serialVersionUID = 1L;

  protected InputSource source = null;
  protected IRubyObject detected_encoding = null;
  protected int stringDataSize = -1;
  protected String java_encoding;

  public
  ParserContext(Ruby runtime)
  {
    // default to class 'Object' because this class isn't exposed to Ruby
    super(runtime, runtime.getObject());
  }

  public
  ParserContext(Ruby runtime, RubyClass klass)
  {
    super(runtime, klass);
  }

  protected InputSource
  getInputSource()
  {
    return source;
  }

  public void
  setIOInputSource(ThreadContext context, IRubyObject data, IRubyObject url)
  {
    source = new InputSource();
    ParserContext.setUrl(context, source, url);

    Ruby ruby = context.getRuntime();

    if (!(data.respondsTo("read"))) {
      throw ruby.newTypeError("must respond to :read");
    }

    source.setByteStream(new IOInputStream(data));
    if (java_encoding != null) {
      source.setEncoding(java_encoding);
    }
  }

  public void
  setStringInputSource(ThreadContext context, IRubyObject data, IRubyObject url)
  {
    source = new InputSource();
    ParserContext.setUrl(context, source, url);

    Ruby ruby = context.getRuntime();

    if (!(data instanceof RubyString)) {
      throw ruby.newTypeError("must be kind_of String");
    }

    RubyString stringData = (RubyString) data;

    if (stringData.encoding(context) != null) {
      RubyString stringEncoding = stringData.encoding(context).asString();
      String encName = NokogiriHelpers.getValidEncodingOrNull(stringEncoding);
      if (encName != null) {
        java_encoding = encName;
      }
    }

    ByteList bytes = stringData.getByteList();

    stringDataSize = bytes.length() - bytes.begin();
    ByteArrayInputStream stream = new ByteArrayInputStream(bytes.unsafeBytes(), bytes.begin(), bytes.length());
    source.setByteStream(stream);
    source.setEncoding(java_encoding);
  }

  public static void
  setUrl(ThreadContext context, InputSource source, IRubyObject url)
  {
    String path = rubyStringToString(url);
    // Dir.chdir might be called at some point before this.
    if (path != null) {
      try {
        URI uri = URI.create(path);
        source.setSystemId(uri.toURL().toString());
      } catch (Exception ex) {
        // fallback to the old behavior
        File file = new File(path);
        if (file.isAbsolute()) {
          source.setSystemId(path);
        } else {
          String pwd = context.getRuntime().getCurrentDirectory();
          String absolutePath;
          try {
            absolutePath = new File(pwd, path).getCanonicalPath();
          } catch (IOException e) {
            absolutePath = new File(pwd, path).getAbsolutePath();
          }
          source.setSystemId(absolutePath);
        }
      }
    }
  }

  protected void
  setEncoding(String encoding)
  {
    source.setEncoding(encoding);
  }

  /**
   * Set the InputSource to read from <code>file</code>, a String filename.
   */
  public void
  setInputSourceFile(ThreadContext context, IRubyObject file)
  {
    source = new InputSource();
    ParserContext.setUrl(context, source, file);
  }

  /**
   * Set the InputSource from <code>stream</code>.
   */
  public void
  setInputSource(InputStream stream)
  {
    source = new InputSource(stream);
  }

  /**
   * Wrap Nokogiri parser options in a utility class.  This is
   * read-only.
   */
  public static class Options
  {
    protected static final long STRICT = 0;
    protected static final long RECOVER = 1;
    protected static final long NOENT = 2;
    protected static final long DTDLOAD = 4;
    protected static final long DTDATTR = 8;
    protected static final long DTDVALID = 16;
    protected static final long NOERROR = 32;
    protected static final long NOWARNING = 64;
    protected static final long PEDANTIC = 128;
    protected static final long NOBLANKS = 256;
    protected static final long SAX1 = 512;
    protected static final long XINCLUDE = 1024;
    protected static final long NONET = 2048;
    protected static final long NODICT = 4096;
    protected static final long NSCLEAN = 8192;
    protected static final long NOCDATA = 16384;
    protected static final long NOXINCNODE = 32768;

    public final boolean strict;
    public final boolean recover;
    public final boolean noEnt;
    public final boolean dtdLoad;
    public final boolean dtdAttr;
    public final boolean dtdValid;
    public final boolean noError;
    public final boolean noWarning;
    public final boolean pedantic;
    public final boolean noBlanks;
    public final boolean sax1;
    public final boolean xInclude;
    public final boolean noNet;
    public final boolean noDict;
    public final boolean nsClean;
    public final boolean noCdata;
    public final boolean noXIncNode;

    protected static boolean
    test(long options, long mask)
    {
      return ((options & mask) == mask);
    }

    public
    Options(long options)
    {
      strict = ((options & RECOVER) == STRICT);
      recover = test(options, RECOVER);
      noEnt = test(options, NOENT);
      dtdLoad = test(options, DTDLOAD);
      dtdAttr = test(options, DTDATTR);
      dtdValid = test(options, DTDVALID);
      noError = test(options, NOERROR);
      noWarning = test(options, NOWARNING);
      pedantic = test(options, PEDANTIC);
      noBlanks = test(options, NOBLANKS);
      sax1 = test(options, SAX1);
      xInclude = test(options, XINCLUDE);
      noNet = test(options, NONET);
      noDict = test(options, NODICT);
      nsClean = test(options, NSCLEAN);
      noCdata = test(options, NOCDATA);
      noXIncNode = test(options, NOXINCNODE);
    }
  }

  /*
  public static class NokogiriXIncludeEntityResolver implements org.xml.sax.EntityResolver {
      InputSource source;
      public NokogiriXIncludeEntityResolver(InputSource source) {
          this.source = source;
      }

      @Override
      public InputSource resolveEntity(String publicId, String systemId)
              throws SAXException, IOException {
          if (systemId != null) source.setSystemId(systemId);
          if (publicId != null) source.setPublicId(publicId);
          return source;
      }
  } */

  public static abstract class ParserTask<T extends ParserContext> implements Callable<T>
  {

    protected final ThreadContext context; // TODO does not seem like a good idea!?
    protected final IRubyObject handler;
    protected final T parser;

    protected
    ParserTask(ThreadContext context, IRubyObject handler, T parser)
    {
      this.context = context;
      this.handler = handler;
      this.parser = parser;
    }

  }

}