nichtich/Pandoc-Elements

View on GitHub
lib/Pandoc/Elements.pm

Summary

Maintainability
Test Coverage
package Pandoc::Elements;
use strict;
## no critic (ProhibitNoStrict, ProhibitSubroutinePrototypes)
use warnings;
use 5.010001;

our $VERSION = '0.38';

use Carp;
use JSON qw(decode_json);
use Scalar::Util qw(blessed reftype);
use Pandoc::Walker qw(walk);
use Pandoc::Version;


our $PANDOC_VERSION; # a string like '1.16'
$PANDOC_VERSION ||= eval { Pandoc::Version->new($ENV{PANDOC_VERSION}) };

# internal variables

my $PANDOC_API_MIN = Pandoc::Version->new('1.12.3');    # since pandoc 1.12.1
my $PANDOC_BIN_MIN = Pandoc::Version->new('1.12.1');

# release version => minimal required api version
my @REQUIRED_API = map { Pandoc::Version->new($_) }
    '1.19'   => '1.17',  # pandoc 1.19 has api 1.17.0.4, compatible with api 1.17
    '1.18'   => '1.17',  # pandoc 1.18 has api 1.17.0.4, compatible with api 1.17
    '1.16'   => '1.16',  # pandoc 1.16 has api 1.16
    '1.17'   => '1.16',  # pandoc 1.17 has api 1.16
;

sub _as_pandoc_version {
    (blessed $_[0] and $_[0]->isa('Pandoc::Version'))
        ? $_[0] : Pandoc::Version->new($_[0])
}

sub pandoc_version {
    if (@_) {
        my $doc = shift;
        if (@_) {
            $doc->api_version(
                _minimum_pandoc_api_for(@_)
                    // croak "pandoc version not supported"
            );
        }
        _minimum_pandoc_version_for_api($doc->api_version);
    } elsif (defined $PANDOC_VERSION) {
        _as_pandoc_version($PANDOC_VERSION)
    } else {
        $REQUIRED_API[0]
    }
}

sub _minimum_pandoc_version_for_api {
    my $api = shift;

    my $version;

    foreach (grep { $_ % 2} 0 .. @REQUIRED_API) { # 1,3,...
        if ($api->match($REQUIRED_API[$_]) ) {
            if (!$version or $version > $REQUIRED_API[$_-1]) {
                $version = $REQUIRED_API[$_-1]
            }
        }
    }

    if (!$version and $api >= $PANDOC_API_MIN) {
        $PANDOC_BIN_MIN;
    } else {
        return $version;
    }
}

sub _minimum_pandoc_api_for {
    my $version = _as_pandoc_version(shift);
    return if @$version <= 1; # require major.minor

    foreach (grep { $_ % 2} 0 .. @REQUIRED_API) { # 1,3,...
        if ($version->match($REQUIRED_API[$_-1]) ) {
            return $REQUIRED_API[$_];
        }
    }

    # required version is newer than any known version
    # return the latest known api version and hope it has not changed
    if ($version > $REQUIRED_API[0]) {
        return $REQUIRED_API[1];
    }

    return $version >= $PANDOC_BIN_MIN ? $PANDOC_API_MIN : undef;
}

our %ELEMENTS = (

    # BLOCK ELEMENTS
    Plain          => [ Block => 'content' ],
    Para           => [ Block => 'content' ],
    CodeBlock      => [ Block => qw(attr content) ],
    RawBlock       => [ Block => qw(format content) ],
    BlockQuote     => [ Block => 'content' ],
    OrderedList    => [ Block => qw(attr content/items) ],
    BulletList     => [ Block => 'content/items' ],
    DefinitionList => [ Block => 'content/items:[DefinitionPair]' ],
    Header         => [ Block => qw(level attr content) ],
    HorizontalRule => ['Block'],
    Table          => [ Block => qw(caption alignment widths headers rows) ],
    Div            => [ Block => qw(attr content) ],
    Null           => ['Block'],
    LineBlock      => [ Block => qw(content) ],

    # INLINE ELEMENTS
    Str         => [ Inline => 'content' ],
    Emph        => [ Inline => 'content' ],
    Strong      => [ Inline => 'content' ],
    Strikeout   => [ Inline => 'content' ],
    Superscript => [ Inline => 'content' ],
    Subscript   => [ Inline => 'content' ],
    SmallCaps   => [ Inline => 'content' ],
    Quoted      => [ Inline => qw(type content) ],
    Cite        => [ Inline => qw(citations:[Citation] content) ],
    Code        => [ Inline => qw(attr content) ],
    Space       => ['Inline'],
    SoftBreak   => ['Inline'],
    LineBreak   => ['Inline'],
    Math        => [ Inline => qw(type content) ],
    RawInline   => [ Inline => qw(format content) ],
    Link        => [ Inline => qw(attr content target) ],
    Image       => [ Inline => qw(attr content target) ],
    Note        => [ Inline => 'content' ],
    Span        => [ Inline => qw(attr content) ],

    # METADATA ELEMENTS
    MetaBool    => [ Meta => 'content' ],
    MetaString  => [ Meta => 'content' ],
    MetaMap     => [ Meta => 'content' ],
    MetaInlines => [ Meta => 'content' ],
    MetaList    => [ Meta => 'content' ],
    MetaBlocks  => [ Meta => 'content' ],

    # TYPE KEYWORDS
    map { $_ => ['Keyword'] }
    qw(DefaultDelim Period OneParen TwoParens SingleQuote DoubleQuote
    DisplayMath InlineMath AuthorInText SuppressAuthor NormalCitation
    AlignLeft AlignRight AlignCenter AlignDefault DefaultStyle Example
    Decimal LowerRoman UpperRoman LowerAlpha UpperAlpha)
);

use parent 'Exporter';
our @EXPORT = (
    keys %ELEMENTS,
    qw(Document attributes metadata citation pandoc_version pandoc_json pandoc_query)
);
our @EXPORT_OK = ( @EXPORT, 'element' );

# create constructor functions
foreach my $name ( keys %ELEMENTS ) {
    no strict 'refs';    ## no critic

    my ( $parent, @accessors ) = @{ $ELEMENTS{$name} };
    my $numargs = scalar @accessors;
    my @parents = map { "Pandoc::Document::$_" } ($parent);
    $parent = join ' ', map { "Pandoc::Document::$_" } $parent,
      map { 'AttributesRole' } grep { $_ eq 'attr' } @accessors;

    ## no critic (ProhibitStringyEval)
    eval "package Pandoc::Document::$name; our \@ISA = qw($parent);";

    *{ __PACKAGE__ . "::$name" } = Scalar::Util::set_prototype(
        sub {
            croak "$name expects $numargs arguments, but given " . scalar @_
              if @_ != $numargs;
            my $self = bless {
                t => $name,
                c => ( @_ == 1 ? $_[0] : [@_] )
            }, "Pandoc::Document::$name";
            $self->set_content(@_);
            $self;
        },
        '$' x $numargs
    );

    for ( my $i = 0 ; $i < @accessors ; $i++ ) {
        my $member = @accessors == 1 ? "\$e->{c}" : "\$e->{c}->[$i]";
        my $code = "my \$e = shift; $member = ( 1 == \@_ ? \$_[0] : [\@_] ) if \@_; return";
        # auto-bless on access via accessor (TODO: move to constructor?)
        $code .= $accessors[$i] =~ s/:\[(.+)\]$//
        ? " [ map { bless \$_, 'Pandoc::Document::$1' } \@{$member} ];"
        : " $member;";
        for ( split '/', $accessors[$i] ) {
            ## no critic
            *{"Pandoc::Document::${name}::$_"} = eval "sub { $code }";
        }
    }
}

sub element {
    my $name = shift;
    no strict 'refs'; #
    croak "undefined element" unless defined $name;
    croak "unknown element $name" unless $ELEMENTS{$name};
    &$name(@_);
}

sub Document {

    my $from_json;
    my $arg = do {
        if ( @_ == 1 ) {
            $from_json = 1;
            my $reftype = reftype $_[0] // '';
            if ( $reftype eq 'ARRAY') {
                # old JSON format
                {
                    meta => $_[0]->[0]->{unMeta},
                    blocks => $_[0]->[1],
                    api_version => 1.16,
                }
            } elsif ( $reftype eq 'HASH' ) {
                $_[0]
            } else {
                croak 'Document: expect array or hash reference'
            }
        } elsif ( @_ == 2 ) {
            # \%meta, \@blocks
            { meta => $_[0], blocks => $_[1] }
        } elsif ( @_ % 2 ) {
            # odd number of args
            croak "Document: too many or ambiguous arguments";
        } else {
            # even number of args: api_version as named parameter
            { meta => shift, blocks => shift, @_ }
        }
    };

    # prefer haskell-style key but accept perl-style and abbreviated key
    my $api_version = $arg->{'pandoc-api-version'}
        // $arg->{pandoc_api_version}
        // $arg->{api_version};

    # We copy values here because $arg may not be a pure AST representation
    my $doc = bless { blocks => ( $arg->{blocks} // [] ) }, 'Pandoc::Document';

    # unblessed metadata in internal format can only come from JSON
    my $meta = $arg->{meta} // {};
    if ($from_json) {
        croak "Document metadata must be a hash" unless 'HASH' eq reftype $meta;
        $doc->{meta} = bless {
            map { $_ => _bless_pandoc_element( $meta->{$_} ) } keys %$meta
        }, 'Pandoc::Document::Metadata';
    } else {
        # otherwise allow user-friendly upgrade via 'metadata' function
        $doc->meta($meta)
    }

    if (!defined $api_version and defined $arg->{pandoc_version}) {
        $doc->pandoc_version($arg->{pandoc_version});
    } else {
        $doc->api_version($api_version // $REQUIRED_API[1]);
    }

    walk $doc, \&_bless_pandoc_element;

    return $doc;

}

# internal helper method
sub _bless_pandoc_element {
    my $e = shift;
    return $e unless ref $e;
    return $e if blessed $e and $e->isa('Pandoc::Document::Element');

    # TODO: run recursively via set_content (don't require 'walk')
    if ( 'MetaMap' eq $e->{t} ) {
        for my $v ( values %{ $e->{c} } ) {
            _bless_pandoc_element( $v );
        }
    }

    bless $e, 'Pandoc::Document::' . $e->{t};
    $e->upgrade($e) if $e->can('upgrade');
    return $e;
}


# specific accessors

sub Pandoc::Document::DefinitionPair::term        { $_[0]->[0] }
sub Pandoc::Document::DefinitionPair::definitions { $_[0]->[1] }

# additional functions

sub attributes($) {

    my $e = Span(['',[],[]],[]); # to make use of AttributesRole
    $e->keyvals(@_);

    return $e->attr;
}

sub citation($) { Pandoc::Document::Citation->new( @_ ) }

# XXX: must require rather than use Pandoc::Metadata
# or its attempt to use Pandoc::Elements will result in a broken state.
require Pandoc::Metadata;

sub metadata($);  ## no critic

sub metadata($) { ## no critic
    my $value = shift;
    if ( !ref $value ) {
        MetaString($value // '')
    }
    elsif ( JSON::is_bool($value) ) {
        MetaBool($value)
    }
    elsif ( blessed($value) ) {
        if ( $value->can('is_meta') and $value->is_meta ) {
            $value
        }
        elsif ( $value->can('is_inline') and $value->is_inline ) {
            MetaInlines([ $value ])
        }
        elsif ( $value->can('is_block') and $value->is_block ) {
            MetaBlocks([ $value ])
        } elsif ( $value->isa('Pandoc::Document::Metadata') ) {
            MetaMap( { map { $_ => $value->{$_} } keys %$value } )
        } else {
            MetaString("$value")
        }
    }
    elsif ( reftype $value eq 'ARRAY' ) {
        MetaList( [ map { metadata $_ } @$value ] )
    }
    elsif ( reftype $value eq 'HASH' ) {
        MetaMap( { map { $_ => metadata $value->{$_} } keys %$value } )
    }
    else {
        MetaString("$value")
    }
}

sub pandoc_json($) {
    shift if $_[0] =~ /^Pandoc::/;

    my $ast = eval { decode_json( $_[0] ) };
    if ($@) {
        $@ =~ s/ at [^ ]+Elements\.pm line \d+//;
        chomp $@;
        croak $@;
    }
    return Document $ast;
}

*pandoc_query = *Pandoc::Walker::query;

# document element packages

{

    package Pandoc::Document;
    use strict;
    use Carp 'croak';
    use Scalar::Util qw(blessed reftype);
    use Pandoc;
    our $VERSION = '0.04';
    our @ISA = ('Pandoc::Document::Element');
    sub blocks;
    sub name { 'Document' }
    sub meta {
        if (@_ > 1) {
            croak "document metadata must be a hash"
                unless 'HASH' eq reftype $_[1];
            my $map = Pandoc::Elements::metadata($_[1])->content;
            $_[0]->{meta} = bless $map, 'Pandoc::Document::Metadata';
        }
        $_[0]->{meta};
    }
    sub content {
        $_[0]->{blocks} = $_[1] if @_ > 1;
        $_[0]->{blocks};
    }
    *blocks = \&content;
    sub is_document { 1 }
    sub as_block {
        bless { t => 'Div', c => [ {}, $_[0]->{blocks} ] }, 'Pandoc::Document::Div';
    }
    sub value {
        shift->meta->value(@_);
    }
    *metavalue = \&value;
    sub string {
        join '', map { $_->string } @{$_[0]->content}
    }
    sub api_version {
        my $self = shift;
        if ( @_ ) {
            my $version = Pandoc::Version->new(shift);
            croak "api_version must be >= $PANDOC_API_MIN"
                if $version < $PANDOC_API_MIN;
            croak "api_version must have major and minor part"
                if @$version < 2;
            $self->{'pandoc-api-version'} = $version;
        }
        return $self->{'pandoc-api-version'};
    }
    *pandoc_version = \&Pandoc::Elements::pandoc_version;
    sub outline {
        my ($self, $depth) = @_;
        _sections( [@{$self->blocks}], $depth );
    }
    sub _sections {
        my ($list, $depth) = @_;
        my (@blocks, @sections);

        # everything up to the first Header
        while (@$list) {
            if ($list->[0]->name eq 'Header') {
                last if !$depth or $depth >= $list->[0]->level;
            }
            push @blocks, shift @$list;
        }

        # divide into sections
        while (@$list) {
            my $header = shift @$list;
            my $level  = $header->level;

            my @content;
            while (@$list) {
                if ($list->[0]->name eq 'Header') {
                    last if $list->[0]->level <= $level;
                }
                push @content, shift @$list;
            }

            my $s = ($depth and $depth < $level)
                ? { blocks => \@content }
                : _sections(\@content,$depth);
            push @sections, { header => $header, %$s };
        }

        return { blocks => \@blocks, sections => \@sections };
    }
    sub to_pandoc {
        my ($self, @args) = @_;
        my $pandoc = (@args and blessed($args[0]) and $args[0]->isa('Pandoc'))
                   ? shift(@args) : pandoc;

        my $api_version = $self->api_version;   # save
        $self->pandoc_version( $pandoc->version );

        my $in = $self->to_json;
        $self->api_version($api_version);       # restore

        $pandoc->run( [ -f => 'json', @args ], { in => \$in, out => \my $out } );
        return $out;
    }
    foreach my $format (qw(markdown latex html rst plain)) {
        no strict 'refs';
        *{ __PACKAGE__ . "::to_$format" } = sub {
            shift()->to_pandoc( @_, '-t' => $format );
        }
    }
}

{
    package Pandoc::Document::Keyword;
    our @ISA = ('Pandoc::Document::Element');
}

{

    package Pandoc::Document::Element;
    use strict;
    use warnings;
    our $VERSION = $Pandoc::Document::VERSION;
    use JSON ();
    use Scalar::Util qw(reftype blessed);
    use Pandoc::Walker ();
    use Pandoc::Selector;
    use subs qw(walk query transform);    # Silence syntax warnings

    sub to_json {
        JSON->new->utf8->canonical->convert_blessed->encode( $_[0] );
    }

    sub TO_JSON {

        # Run everything thru this method so arrays/hashes are cloned
        # and objects without TO_JSON methods are stringified.
        # Required to ensure correct scalar types for Pandoc.

# There is no easy way in Perl to tell if a scalar value is already a string or number,
# so we stringify all scalar values and numify/boolify as needed afterwards.

        my ( $ast, $maybe_blessed ) = @_;
        if ( $maybe_blessed && blessed $ast ) {
            return $ast if $ast->can('TO_JSON');    # JSON.pm will convert
                 # may have overloaded stringification! Should we check?
                 # require overload;
              # return "$ast" if overload::Method($ast, q/""/) or overload::Method($ast, q/0+/);
              # carp "Non-stringifiable object $ast";
            return "$ast";
        }
        elsif ( 'ARRAY' eq reftype $ast ) {
            return [ map { ref($_) ? TO_JSON( $_, 1 ) : "$_"; } @$ast ];
        }
        elsif ( 'HASH' eq reftype $ast ) {
            my %ret = %$ast;
            while ( my ( $k, $v ) = each %ret ) {
                $ret{$k} = ref($v) ? TO_JSON( $v, 1 ) : "$v";
            }
            return \%ret;
        }
        else { return "$ast" }
    }

    sub name        { $_[0]->{t} }
    sub content     {
       my $e = shift;
       $e->set_content(@_) if @_;
       $e->{c}
    }
    sub set_content { # TODO: document this
       my $e = shift;
       $e->{c} = @_ == 1 ? $_[0] : [@_]
    }
    sub is_document { 0 }
    sub is_block    { 0 }
    sub is_inline   { 0 }
    sub is_meta     { 0 }
    sub as_block    {
        bless { t => 'Null', c => [] }, 'Pandoc::Document::Null';
    }
    *walk      = *Pandoc::Walker::walk;
    *query     = *Pandoc::Walker::query;
    *transform = *Pandoc::Walker::transform;

    sub string {

        # TODO: fix issue #4 to avoid this duplication
        if ( $_[0]->name =~ /^(Str|Code|CodeBlock|Math|MetaString)$/ ) {
            return $_[0]->content;
        }
        elsif ( $_[0]->name =~ /^(LineBreak|SoftBreak|Space)$/ ) {
            return ' ';
        }
        join '', @{
            $_[0]->query(
                {
                    'Str|Code|CodeBlock|Math|MetaString'  => sub { $_->content },
                    'LineBreak|Space|SoftBreak' => sub { ' ' },
                }
            );
        };
    }

    sub match {
        my $self = shift;
        my $selector = blessed $_[0] ? shift : Pandoc::Selector->new(shift);
        $selector->match($self);
    }
}

{

    package Pandoc::Document::AttributesRole;
    use Hash::MultiValue;
    use Scalar::Util qw(reftype blessed);
    use Carp qw(croak);

    sub id {
        $_[0]->attr->[0] = defined $_[1] ? "$_[1]" : "" if @_ > 1;
        $_[0]->attr->[0]
    }

    sub classes {
        my $e = shift;
        croak 'Method classes() is not a setter' if @_;
        warn "->classes is deprecated. Use [ split ' ', \$e->class ] instead\n";
        $e->attr->[1]
    }

    sub class {
        my $e = shift;
        if (@_) {
            $e->attr->[1] = [
                grep { $_ ne '' }
                map { split qr/\s+/, $_ }
                map { (ref $_ and reftype $_ eq 'ARRAY') ? @$_ : $_ }
                @_
            ];
        }
        join ' ', @{$e->attr->[1]}
    }

    sub add_attribute {
        my ($e, $key, $value) = @_;
        if ($key eq 'id') {
            $e->id($value);
        } elsif ($key eq 'class') {
            $value //= '';
            $value = ["$value"] unless (reftype $value // '') eq 'ARRAY';
            push @{$e->attr->[1]}, grep { $_ ne '' } map { split qr/\s+/, $_ } @$value;
        } else {
            push @{$e->attr->[2]}, [ $key, "$value" ];
        }
    }

    sub keyvals {
        my $e = shift;
        if (@_) {
            my $attrs = @_ == 1 ? shift : Hash::MultiValue->new(@_);
            unless (blessed $attrs and $attrs->isa('Hash::MultiValue')) {
                $attrs = Hash::MultiValue->new(%$attrs);
            }
            $e->attr->[1] = [] if exists $attrs->{class};
            $e->attr->[2] = [];
            $attrs->each(sub { $e->add_attribute(@_) });
        }
        my @h;
        push @h, id => $e->id if $e->id ne '';
        push @h, class => $e->class if @{$e->attr->[1]};
        Hash::MultiValue->new( @h, map { @$_ } @{$e->attr->[2]} );
    }

}

{

    package Pandoc::Document::Block;
    our $VERSION = $PANDOC::Document::VERSION;
    our @ISA     = ('Pandoc::Document::Element');
    sub is_block { 1 }
    sub as_block { $_[0] }
    sub null { # TODO: document this (?)
        %{$_[0]} = (t => 'Null', c => []);
        bless $_[0], 'Pandoc::Document::Null';
    }
}

{

    package Pandoc::Document::Inline;
    our $VERSION = $PANDOC::Document::VERSION;
    our @ISA     = ('Pandoc::Document::Element');
    sub is_inline { 1 }
    sub as_block {
        bless { t => 'Plain', c => [ $_[0] ] }, 'Pandoc::Document::Plain';
    }
}

{

    package Pandoc::Document::LinkageRole;
    our $VERSION = $PANDOC::Document::VERSION;

    for my $Element (qw[ Link Image ]) {
        no strict 'refs';    #no critic
        unshift @{"Pandoc::Document::${Element}::ISA"}, __PACKAGE__; # no critic
    }

    sub url   { my $e = shift; $e->{c}->[-1][0] = shift if @_; return $e->{c}->[-1][0] //= ""; }
    sub title { my $e = shift; $e->{c}->[-1][1] = shift if @_; return $e->{c}->[-1][1] //= ""; }

    sub upgrade {
        # prepend attributes to old-style ast
        unshift @{ $_[0]->{c} }, [ "", [], [] ]
            if 2 == @{ $_[0]->{c} };
    }
}

{
    package Pandoc::Document::Citation;
    our $VERSION = $PANDOC::Document::VERSION;

    use Carp qw[ carp croak ];

    my %props = (
        id     => { key => 'citationId',      default => '"missing"' },
        prefix => { key => 'citationPrefix',  default => '[]' },
        suffix => { key => 'citationSuffix',  default => '[]' },
        num    => { key => 'citationNoteNum', default => '0' },
        hash   => { key => 'citationHash',    default => '1' },
        mode   => {
            key     => 'citationMode',
            default => q{
            bless(
                { t => 'NormalCitation', c => [] },
                'Pandoc::Document::NormalCitation'
              )
        },
        },
    );

    {
        my $template = <<'END_OF_TEMPLATE';
#line 1 "method [[[method]]]()"
package Pandoc::Document::Citation;
sub [[[method]]] {
    my $self = shift;
    if ( @_ ) {
        $self->{[[[key]]]} = [[[coerce]]] ( shift // [[[default]]] );
    }
    return [[[coerce]]] ( $self->{[[[key]]]} //= [[[default]]] );
}
no warnings 'once';
*[[[alias]]] = \&[[[method]]];
1;
END_OF_TEMPLATE

        while ( my ( $name, $prop ) = each %props ) {
            $prop->{name}   = $name;
            $prop->{method} = "Pandoc::Document::Citation::$name";
            $prop->{alias}  = "Pandoc::Document::Citation::$prop->{key}";
            $prop->{coerce}
              = $prop->{default} =~ /^\d$/ ? '0 +'
              : $prop->{default} =~ /^"/   ? '"" .'
              :                              "";
            {
                ( my $source = $template ) =~ s/\Q[[[\E(\w+)\Q]]]\E/$prop->{$1}/g;
                local $@;
                ## no critic
                eval $source || croak $@ . $source;
            }
        }
    }

    my %accessors = map { ; $_->{name} => $_->{key} } values %props;

    sub new {
        my ( $class, $arg ) = @_;
        my $self = bless {}, $class;
        while ( my ( $name, $key ) = each %accessors ) {
            # coerce on access
            $self->$name( $arg->{$key} // $arg->{$name} );
        }
        return $self;
    }

    no warnings 'once';
    *TO_JSON = \&Pandoc::Document::Element::TO_JSON;
}

# Special TO_JSON methods to coerce data to int/number/Boolean as appropriate
# and to downgrade document model depending on pandoc_version

sub Pandoc::Document::to_json {
    my ($self) = @_;

    local $Pandoc::Elements::PANDOC_VERSION =
        $Pandoc::Elements::PANDOC_VERSION // $self->pandoc_version;

    return Pandoc::Document::Element::to_json( $self->TO_JSON );
}

sub Pandoc::Document::TO_JSON {
    my ( $self ) = @_;
    return $self->api_version >= 1.17
        ? { %$self }
        : [ { unMeta => $self->{meta} }, $self->{blocks} ]
}

sub Pandoc::Document::SoftBreak::TO_JSON {
    if ( pandoc_version() < '1.16' ) {
        return { t => 'Space', c => [] };
    } else {
        return { t => 'SoftBreak', c => [] };
    }
}

sub Pandoc::Document::LinkageRole::TO_JSON {
    my $ast = Pandoc::Document::Element::TO_JSON( $_[0] );
    if ( pandoc_version() < 1.16 ) {
        # remove attributes
        $ast->{c} = [ @{ $ast->{c} }[ 1, 2 ] ];
    }
    return $ast;
}

sub Pandoc::Document::Header::TO_JSON {
    my $ast = Pandoc::Document::Element::TO_JSON( $_[0] );

    # coerce heading level to int
    $ast->{c}[0] = int( $ast->{c}[0] );
    return $ast;
}

sub Pandoc::Document::OrderedList::TO_JSON {
    my $ast = Pandoc::Document::Element::TO_JSON( $_[0] );

    # coerce first item number to int
    $ast->{c}[0][0] = int( $ast->{c}[0][0] );
    return $ast;
}

sub Pandoc::Document::Table::TO_JSON {
    my $ast = Pandoc::Document::Element::TO_JSON( $_[0] );

    # coerce column widths to numbers (floats)
    $_ += 0 for @{ $ast->{c}[2] };    # faster than map
    return $ast;
}

sub Pandoc::Document::Cite::TO_JSON {
    my $ast = Pandoc::Document::Element::TO_JSON( $_[0] );
    for my $citation ( @{ $ast->{c}[0] } ) {
        for my $key (qw[ citationHash citationNoteNum ]) {

            # coerce to int
            $citation->{$key} = int( $citation->{$key} );
        }
    }
    return $ast;
}

sub Pandoc::Document::LineBlock::TO_JSON {
    my $ast     = Pandoc::Document::Element::TO_JSON( $_[0] );
    my $content = $ast->{c};

    for my $line ( @$content ) {

        # Convert spaces at the beginning of each line
        # to Unicode non-breaking spaces, because pandoc does.
        next unless @$line and $line->[0]->{t} eq 'Str';
        $line->[0]->{c} =~ s{^(\x{20}+)}{ "\x{a0}" x length($1) }e;
    }

    return $ast if pandoc_version() >= 1.18;

    my $c = [ map { ; @$_, LineBreak() } @{$content} ];
    pop @$c;    # remove trailing line break
    return Para( $c )->TO_JSON;
}

1;
__END__

=encoding utf-8

=head1 NAME

Pandoc::Elements - create and process Pandoc documents

=begin markdown

# STATUS

[![Unix Build Status](https://travis-ci.org/nichtich/Pandoc-Elements.svg)](https://travis-ci.org/nichtich/Pandoc-Elements)
[![Windows Build status](https://ci.appveyor.com/api/projects/status/pb7vdd14xml9ho43?svg=true)](https://ci.appveyor.com/project/nichtich/pandoc-elements)
[![Coverage Status](https://coveralls.io/repos/nichtich/Pandoc-Elements/badge.svg)](https://coveralls.io/r/nichtich/Pandoc-Elements)
[![Kwalitee Score](http://cpants.cpanauthors.org/dist/Pandoc-Elements.png)](http://cpants.cpanauthors.org/dist/Pandoc-Elements)
[![Code Climate Issue Count](https://codeclimate.com/github/nichtich/Pandoc-Elements/badges/issue_count.svg)](https://codeclimate.com/github/nichtich/Pandoc-Elements)

=end markdown

=head1 SYNOPSIS

The output of this script C<hello.pl>

    use Pandoc::Elements;
    use JSON;

    print Document(
        {
            title => MetaInlines [ Str "Greeting" ]
        },
        [
            Header( 1, attributes { id => 'top' }, [ Str 'Hello' ] ),
            Para [ Str 'Hello, world!' ],
        ],
        api_version => '1.17.0.4'
    )->to_json;

can be converted for instance to HTML via

    ./hello.pl | pandoc -f json -t html5 --standalone

an equivalent Pandoc Markdown document would be

    % Greeting
    # Gruß {.de}
    Hello, world!

=head1 DESCRIPTION

Pandoc::Elements provides utility functions to parse, serialize, and modify
abstract syntax trees (AST) of L<Pandoc|http://pandoc.org/> documents. Pandoc
can convert this data structure to many other document formats, such as HTML,
LaTeX, ODT, and ePUB.

See also module L<Pandoc::Filter>, command line script L<pod2pandoc>, and the
internal modules L<Pandoc::Walker> and L<Pod::Simple::Pandoc>.

=head1 PANDOC VERSIONS

The Pandoc document model is defined in file
L<Text.Pandoc.Definition|https://hackage.haskell.org/package/pandoc-types/docs/Text-Pandoc-Definition.html>
as part of Haskell package
L<pandoc-types|https://hackage.haskell.org/package/pandoc-types>.

Pandoc::Elements is compatible with pandoc-types 1.12.3 (released with pandoc
1.12.1) up to I<at least> pandoc-types-1.17.0.4 (first releases with pandoc
1.18). JSON output of all pandoc releases since 1.12.1 can be parsed with
function C<pandoc_json>, the L</Document> constructor or method C<parse> of
module L<Pandoc>. The AST is always upgraded to pandoc-types 1.17 and
downgraded to another api version on serialization with C<to_json>.

To determine the api version required by a version of pandoc executable since
version 1.18 execute pandoc with the C<--version> option and check which
version of the C<pandoc-types> library pandoc was compiled with.

Beginning with version 1.18 pandoc will not decode a JSON AST representation
unless the major and minor version numbers (Document method C<api_version>)
match those built into that version of pandoc. The following changes in pandoc
document model have been implemented:

=over

=item

pandoc-types 1.17, released for pandoc 1.18, introduced the
L<LineBlock|/LineBlock> element and modified representation
of the root L<Document|/Document> element.

=item

pandoc-types 1.16, released with pandoc 1.16, introduced attributes to L<Link|/Link> and L<Image|/Image> elements

=item

pandoc-types 1.12.3, released with pandoc 1.12.1, modified the representation
of elements to objects with field C<t> and C<c>. This is also the internal
representation of documents used in this module.

=back

=head1 FUNCTIONS

The following functions and keywords are exported by default:

=over

=item

Constructors for all Pandoc document element (L<block elements|/BLOCK ELEMENTS>
such as C<Para> and L<inline elements|/INLINE ELEMENTS> such as C<Emph>,
L<metadata elements|/METADATA ELEMENTS> and the L<Document|/DOCUMENT ELEMENT>).

=item

L<Type keywords|/TYPE KEYWORDS> such as C<Decimal> and C<LowerAlpha> to be used
as types in other document elements.

=item

The following helper functions C<pandoc_json>, C<pandoc_version>,
C<attributes>, C<metadata>, C<citation>, and C<element>.

=back

=head2 pandoc_json $json

Parse a JSON string, as emitted by pandoc in JSON format. This is the reverse
to method C<to_json> but it can read both old (before Pandoc 1.16) and new
format.

=head2 attributes { key => $value, ... }

Maps a hash reference or instance of L<Hash::MultiValue> into the internal
structure of Pandoc attributes. The special keys C<id> (string), and C<class>
(string or array reference with space-separated class names) are recognized.
See L<attribute methods|/ATTRIBUTE METHODS> for details.

=head2 citation { ... }

A citation as part of document element L<Cite|/Cite> must be a hash reference
with fields C<citationId> (string), C<citationPrefix> (list of L<inline
elements|/INLINE ELEMENTS>) C<citationSuffix> (list of L<inline
elements|/INLINE ELEMENTS>), C<citationMode> (one of C<NormalCitation>,
C<AuthorInText>, C<SuppressAuthor>), C<citationNoteNum> (integer), and
C<citationHash> (integer). The helper method C<citation> can be used to
construct such a hash by filling in default values and optionally using 
shorter field names (C<id>, C<prefix>, C<suffix>, C<mode>, C<note>, and C<hash>):

    citation {
        id => 'foo',
        prefix => [ Str "see" ],
        suffix => [ Str "p.", Space, Str "42" ]
    }

    # in Pandoc Markdown

    [see @foo p. 42]

The values returned by this function, as well as any citations contained in
document objects returned by C<pandoc_json>, are blessed hashrefs with
getter/setter accessors corresponding to both the full and short field
names, so that e.g. C<< $citation->citationId(...) >> and
C<< $citation->id(...) >> get or set the same value.

=head2 pandoc_version( [ $document ] )

Return a L<Pandoc::Version> object with expected version number of pandoc
executable to be used for serializing documents with L<to_json|/to_json>.

If a L<Document element|/DOCUMENT ELEMENT> is given as argument, the minimal
pandoc release version compatible with its api version is returned.

Without argument, package variable C<$PANDOC_VERSION> is checked for a
preferred pandoc release. By default this variable is set from an environment
variable of same name. If no preferred pandoc release has been specified, the
function returns version 1.18 because this is the first pandoc release
compatible with most recent api version supported by this module.

See also method C<version> of module L<Pandoc> to get the current version of
pandoc executable on your system.

=head2 element( $name => $content )

Create a Pandoc document element of arbitrary name. This function is only
exported on request.

=head1 ELEMENTS AND METHODS

Document elements are encoded as Perl data structures equivalent to the JSON
structure, emitted with pandoc output format C<json>. This JSON structure is
subject to minor changes between L<versions of pandoc|/pandoc_version>.  All
elements are blessed objects that provide L<common element methods|/COMMON
METHODS> (all elements), L<attribute methods|/ATTRIBUTE METHODS> (elements with
attributes), and additional element-specific methods.

=head2 COMMON METHODS

=head3 to_json

Return the element as JSON encoded string. The following are equivalent:

    $element->to_json;
    JSON->new->utf8->canonical->convert_blessed->encode($element);

The serialization format can be adjusted to different L<pandoc versions|/PANDOC
VERSIONS> with module and environment variable C<PANDOC_VERSION> or with
Document element properties C<api_version> and C<pandoc_version>.

When writing filters you can normally just rely on the api version value
obtained from pandoc, since pandoc expects to receive the same JSON format as
it emits.

=head3 name

Return the name of the element, e.g. "Para" for a L<paragraph element|/Para>.

=head3 content

Return the element content. For most elements (L<Para|/Para>, L<Emph|/Emph>,
L<Str|/Str>...) the content is an array reference with child elements. Other
elements consist of multiple parts; for instance the L<Link|/Link> element has
attributes (C<attr>, C<id>, C<class>, C<classes>, C<keyvals>) a link text
(C<content>) and a link target (C<target>) with C<url> and C<title>.

=head3 is_block

True if the element is a L<Block element|/BLOCK ELEMENTS>

=head3 is_inline

True if the element is an inline L<Inline element|/INLINE ELEMENTS>

=head3 is_meta

True if the element is a L<Metadata element|/METADATA ELEMENTS>

=head3 is_document

True if the element is a L<Document element|/DOCUMENT ELEMENT>

=head3 as_block

Return the element unmodified if it is a block element or wrapped in a
L<Plain|/Plain> or L<Div|/Div> otherwise.

=head3 match( $selector )

Check whether the element matches a given L<Pandoc::Selector> (given as
instance or string).

=head3 walk(...)

Walk the element tree with L<Pandoc::Walker>

=head3 query(...)

Query the element to extract results with L<Pandoc::Walker>

=head3 transform(...)

Transform the element tree with L<Pandoc::Walker>

=head3 string

Returns a concatenated string of element content, leaving out all formatting.

=head2 ATTRIBUTE METHODS

Some elements have attributes which can be an identifier, ordered class names
and ordered key-value pairs. Elements with attributes provide the following
methods:

=head3 attr

Get or set the attributes in Pandoc internal structure:

  [ $id, [ @classes ], [ [ key => $value ], ... ] ]

See helper function L<attributes|/attributes-key-value> to create this
structure.

=head3 keyvals

Get all attributes (id, class, and key-value pairs) as new L<Hash::MultiValue>
instance, or replace I<all> key-value pairs plus id and/or class if these are
included as field names. All class fields are split by whitespaces.

  $e->keyvals                           # return new Hash::MultiValue
  $e->keyvals( $HashMultiValue )        # update by instance of Hash::MultiValue
  $e->keyvals( key => $value, ... )     # update by list of key-value pairs
  $e->keyvals( \%hash )                 # update by hash reference
  $e->keyvals( { } )                    # remove all key-value pairs
  $e->keyvals( id => '', class => '' )  # remove all key-value pairs, id, class

=head3 id

Get or set the identifier. See also L<Pandoc::Filter::HeaderIdentifiers> for
utility functions to handle L<Header|/Header> identifiers.

=head3 class

Get or set the list of classes, separated by whitespace.

=head3 add_attribute( $name => $value )

Append an attribute. The special attribute names C<id> and C<class> set or
append identifier or class, respectively.

=head2 DOCUMENT ELEMENT

=head3 Document

Root element, consisting of metadata hash (C<meta>), document element array
(C<content>=C<blocks>) and optional C<api_version>. The constructor accepts
either two arguments and an optional named parameter C<api_version>:

    Document { %meta }, [ @blocks ], api_version => $version_string

or a hash with three fields for metadata, document content, and an optional
pandoc API version:

    {
        meta               => { %metadata },
        blocks             => [ @content ],
        pandoc-api-version => [ $major, $minor, $revision ]
    }

The latter form is used as pandoc JSON format since pandoc release 1.18. If no
api version is given, it will be set 1.17 which was also introduced with pandoc
release 1.18.

A third ("old") form is accepted for compatibility with pandoc JSON format
before release 1.18 and since release 1.12.1: an array with two elements for
metadata and document content respectively.

    [ { unMeta => { %meta } }, [ @blocks ] ]

The api version is set to 1.16 in this case, but older versions down to 1.12.3
used the same format.

Document elements provide the following special methods in addition to
L<common element methods|/COMMON METHODS>:

=over

=item B<api_version( [ $api_version ] )>

Return the pandoc-types version (aka "pandoc-api-version") of this document as
L<Pandoc::Version|Pandoc::Version> object or sets it to a new value. This
version determines how method L<to_json|/to_json> serializes the document.

See L</PANDOC VERSIONS> for details.

=item B<pandoc_version( [ $pandoc_version ] )>

Return the minimum required version of pandoc executable compatible
with the api_version of this document. The following are equivalent:

  $doc->pandoc_version;
  pandoc_version( $doc );

If used as setter, sets the api version of this document to be compatible with
the given pandoc version.

=item B<content> or B<blocks>

Get or set the array of L<block elements|/BLOCK ELEMENTS> of the
document.

=item B<meta( [ $metadata ] )>

Get and/or set combined L<document metadata|Pandoc::Metadata>. Use method
C<value> to get selected metadata fields and values.

=item B<value( [ $pointer ] [ %options ] )>

Get selected document metadata field value(s). See L<Pandoc::Metadata> for
documentation. Can also be called as C<metavalue>, so the following are
equivalent:

  $doc->value( ... );
  $doc->meta->value( ... );
  $doc->metavalue( ... );

=item B<to_pandoc( [ [ $pandoc, ] @arguments ])>

Process the document with L<Pandoc> executable and return its output:

    $doc->to_pandoc( -o => 'doc.html' );
    my $markdown = $doc->to_pandoc( -t => 'markdown' );

The first argument can optionally be an instance of L<Pandoc> to use a specific
executable.

=item B<to_...( [ @arguments ] )>

Process the document into C<markdown> (pandoc's extended Markdown), C<latex>
(LaTeX), C<html> (HTML), C<rst> (reStructuredText), or C<plain> (plain text).
The following are equivalent:

    $doc->to_markdown( @args );
    $doc->to_pandoc( @args, '-t' => 'markdown' );

=item B<outline( [ $depth ] )>

Returns an outline of the document structure based on L<Header|/Header>
elements. The outline is a hierarchical hash reference with the following
fields:

=over

=item header

L<Header|/Header> element (not included at the document root)

=item blocks

List of L<block elements|/Block elements> before the next L<Header|/Header>
element (of given depth or less if a maximum depth was given)

=item sections

List of subsections, each having the same outline structure.

=back

=back

=head2 BLOCK ELEMENTS

=head3 BlockQuote

Block quote, consisting of a list of L<blocks|/BLOCK ELEMENTS> (C<content>)

    BlockQuote [ @blocks ]

=head3 BulletList

Unnumbered list of items (C<content>=C<items>), each a list of
L<blocks|/BLOCK ELEMENTS>

    BulletList [ [ @blocks ] ]

=head3 CodeBlock

Code block (literal string C<content>) with attributes (C<attr>, C<id>,
C<class>, C<classes>, C<keyvals>)

    CodeBlock $attributes, $content

=head3 DefinitionList

Definition list, consisting of a list of pairs (C<content>=C<items>),
each a term (C<term>, a list of L<inlines|/INLINE ELEMENTS>) and one
or more definitions (C<definitions>, a list of L<blocks|/BLOCK ELEMENTS>).

    DefinitionList [ @definitions ]

    # each item in @definitions being a pair of the form

        [ [ @inlines ], [ @blocks ] ]

=head3 Div

Generic container of L<blocks|/BLOCK ELEMENTS> (C<content>) with attributes
(C<attr>, C<id>, C<class>, C<classes>, C<keyvals>).

    Div $attributes, [ @blocks ]

=head3 Header

Header with C<level> (integer), attributes (C<attr>, C<id>, C<class>,
C<classes>, C<keyvals>), and text (C<content>, a list of L<inlines|/INLINE ELEMENTS>).

    Header $level, $attributes, [ @inlines ]

=head3 HorizontalRule

Horizontal rule

    HorizontalRule

=head3 LineBlock

List of lines (C<content>), each a list of L<inlines|/INLINE ELEMENTS>.

    LineBlock [ @lines ]

This element was added in pandoc 1.18. Before it was represented L<Para|/Para>
elements with embedded L<LineBreak|/LineBreak> elements. This old serialization
form can be enabled by setting C<$PANDOC_VERSION> package variable to a lower
version number.

=head3 Null

Nothing

    Null

=head3 OrderedList

Numbered list of items (C<content>=C<items>), each a list of L<blocks|/BLOCK
ELEMENTS>), preceded by list attributes (start number, numbering style, and
delimiter).

    OrderedList [ $start, $style, $delim ], [ [ @blocks ] ]

Supported styles are C<DefaultStyle>, C<Example>, C<Decimal>, C<LowerRoman>,
C<UpperRoman>, C<LowerAlpha>, and C<UpperAlpha>.

Supported delimiters are C<DefaultDelim>, C<Period>, C<OneParen>, and
C<TwoParens>.

=head3 Para

Paragraph, consisting of a list of L<Inline elements|/INLINE ELEMENTS>
(C<content>).

    Para [ $elements ]

=head3 Plain

Plain text, not a paragraph, consisting of a list of L<Inline elements|/INLINE
ELEMENTS> (C<content>).

    Plain [ @inlines ]

=head3 RawBlock

Raw block with C<format> and C<content> string.

    RawBlock $format, $content

=head3 Table

Table, with C<caption>, column C<alignments>, relative column C<widths> (0 =
default), column C<headers> (each a list of L<blocks|/BLOCK ELEMENTS>), and
C<rows> (each a list of lists of L<blocks|/BLOCK ELEMENTS>).

    Table [ @inlines ], [ @alignments ], [ @width ], [ @headers ], [ @rows ]

Possible alignments are C<AlignLeft>, C<AlignRight>, C<AlignCenter>, and
C<AlignDefault>.

An example:

    Table [Str "Example"], [AlignLeft,AlignRight], [0.0,0.0],
     [[Plain [Str "name"]]
     ,[Plain [Str "number"]]],
     [[[Plain [Str "Alice"]]
      ,[Plain [Str "42"]]]
     ,[[Plain [Str "Bob"]]
      ,[Plain [Str "23"]]]];

=head2 INLINE ELEMENTS

=head3 Cite

Citation, a list of C<citations> and a list of L<inlines|/INLINE ELEMENTS>
(C<content>). See helper function L<citation|/citation> to construct
citations.

    Cite [ @citations ], [ @inlines ]

=head3 Code

Inline code, a literal string (C<content>) with attributes (C<attr>, C<id>,
C<class>, C<classes>, C<keyvals>)

    Code attributes { %attr }, $content

=head3 Emph

Emphasized text, a list of L<inlines|/INLINE ELEMENTS> (C<content>).

    Emph [ @inlines ]

=head3 Image

Image with alt text (C<content>, a list of L<inlines|/INLINE ELEMENTS>) and
C<target> (list of C<url> and C<title>) with attributes (C<attr>, C<id>,
C<class>, C<classes>, C<keyvals>).

    Image attributes { %attr }, [ @inlines ], [ $url, $title ]

Serializing the attributes is disabled in api version less then 1.16.

=head3 LineBreak

Hard line break

    LineBreak

=head3 Link

Hyperlink with link text (C<content>, a list of L<inlines|/INLINE ELEMENTS>)
and C<target> (list of C<url> and C<title>) with attributes (C<attr>, C<id>,
C<class>, C<classes>, C<keyvals>).

    Link attributes { %attr }, [ @inlines ], [ $url, $title ]

Serializing the attributes is disabled in api version less then 1.16.

=head3 Math

TeX math, given as literal string (C<content>) with C<type> (one of
C<DisplayMath> and C<InlineMath>).

    Math $type, $content

=head3 Note

Footnote or Endnote, a list of L<blocks|/BLOCK ELEMENTS> (C<content>).

    Note [ @blocks ]

=head3 Quoted

Quoted text with quote C<type> (one of C<SingleQuote> and C<DoubleQuote>) and a
list of L<inlines|/INLINE ELEMENTS> (C<content>).

    Quoted $type, [ @inlines ]

=head3 RawInline

Raw inline with C<format> (a string) and C<content> (a string).

    RawInline $format, $content

=head3 SmallCaps

Small caps text, a list of L<inlines|/INLINE ELEMENTS> (C<content>).

    SmallCaps [ @inlines ]

=head3 SoftBreak

Soft line break

    SoftBreak

This element was added in pandoc 1.16 as a matter of editing convenience to
preserve line breaks (as opposed to paragraph breaks) from input source to
output. If you are going to feed a document containing C<SoftBreak> elements to
Pandoc E<lt> 1.16 you will have to set the package variable or environment
variable C<PANDOC_VERSION> to 1.15 or below.

=head3 Space

Inter-word space

    Space

=head3 Span

Generic container of L<inlines|/INLINE ELEMENTS> (C<content>) with attributes
(C<attr>, C<id>, C<class>, C<classes>, C<keyvals>).

    Span attributes { %attr }, [ @inlines ]

=head3 Str

Plain text, a string (C<content>).

    Str $content

=head3 Strikeout

Strikeout text, a list of L<inlines|/INLINE ELEMENTS> (C<content>).

    Strikeout [ @inlines ]

=head3 Strong

Strongly emphasized text, a list of L<inlines|/INLINE ELEMENTS> (C<content>).

    Strong [ @inlines ]

=head3 Subscript

Subscripted text, a list of L<inlines|/INLINE ELEMENTS> (C<content>).

    Supscript [ @inlines ]

=head3 Superscript

Superscripted text, a list of L<inlines|/INLINE ELEMENTS> (C<content>).

    Superscript [ @inlines ]

=head2 METADATA ELEMENTS

See L<Pandoc::Metadata> for documentation of metadata elements C<MetaBool>,
C<MetaString>, C<MetaMap>, C<MetaInlines>, C<MetaList>, and C<MetaBlocks>.

Helper function C<metadata> can be used to convert scalars, hash references,
array references, and Pandoc Inline/Block elements into metadata elements.

=head2 TYPE KEYWORDS

The following document elements are only as used as type keywords in other
document elements:

=over

=item

C<SingleQuote>, C<DoubleQuote>

=item

C<DisplayMath>, C<InlineMath>

=item

C<AuthorInText>, C<SuppressAuthor>, C<NormalCitation>

=item

C<AlignLeft>, C<AlignRight>, C<AlignCenter>, C<AlignDefault>

=item

C<DefaultStyle>, C<Example>, C<Decimal>, C<LowerRoman>, C<UpperRoman>,
C<LowerAlpha>, C<UpperAlpha>

=item

C<DefaultDelim>, C<Period>, C<OneParen>, C<TwoParens>

=back

=head1 SEE ALSO

Perl module L<Pandoc> implements a wrapper around the pandoc executable.

Similar libraries in other programming languages are listed at L<https://github.com/jgm/pandoc/wiki/Pandoc-wrappers-and-interfaces>.

=head1 AUTHOR

Jakob Voß E<lt>jakob.voss@gbv.deE<gt>

=head1 CONTRIBUTORS

Benct Philip Jonsson E<lt>bpjonsson@gmail.comE<gt>

L<TakeAsk|https://github.com/TakeAsh>

=head1 COPYRIGHT AND LICENSE

Copyright 2014- Jakob Voß

GNU General Public License, Version 2

This module is heavily based on Pandoc by John MacFarlane.

=cut