lib/Catmandu/Exporter/RTF.pm
package Catmandu::Exporter::RTF;
use Catmandu::Sane;
use Catmandu;
use Moo;
with 'Catmandu::Exporter';
has host => (is => 'lazy');
has links => (is => 'ro', default => sub { });
has name => (is => 'ro', default => sub {'LibreCat'});
has style => (is => 'lazy');
my $HEADER = <<EOF;
{\\rtf1\\ansi\\deff0\\adeflang1025
{\\fonttbl{\\f0 Arial;}}
\\f0\\fs22
EOF
sub BUILD {
$_[0]->{_buf} = $HEADER;
}
sub _build_host {
my ($self) = @_;
state $host = Catmandu->config->{uri_base};
}
sub _build_style {
my ($self) = @_;
state $style = do {
grep($self->style,
keys %{Catmandu->config->{citation}->{csl}->{styles}})
? $self->style
: Catmandu->config->{citation}->{csl}->{default_style};
};
}
my $FOOTER = <<EOF;
}
EOF
# all those hexadecimal characters longer than 2 (\\' in rtf will treat the following two characters as hex - but only two!)
my $HEXMAP = {
"152" => "OE"
, # latin capital letter OE - substituted by capital letters O and E
"153" => "oe"
, # latin small letter oe - substituted by small letters o and e
"160" => "S"
, # latin capital letter S with caron - substituted by capital etter S
"161" => "s"
, # latin small letter s with caron - substituted by small letter s
"178" => "y"
, # latin capital letter Y with diaeresis - substituted by capital letter Y
"192" => "f"
, # latin small f with hook, function - substituted by small letter f
"2013" => "\\endash ",
"2014" => "\\emdash ",
"2018" => "\\lquote ",
"2019" => "\\rquote ",
#"201A" => # single low-9 quotation mark
"201C" => "\\ldblquote ",
"201D" => "\\rdblquote ",
"8220" => "\\ldblquote ",
"8221" => "\\rdblquote ",
#"201E" => # double low-9 quotation mark
#"2020" => # dagger
#"2021" => # double dagger
"2022" => "\\bullet ",
#"2026" => # horizontal ellipsis
#"2030" => # per thousand sign
#"20AC" => # euro sign
#"2122" => #trade mark sign
};
sub add {
my ($self, $pub) = @_;
$self->_add_citation($pub);
}
sub commit {
my ($self) = @_;
$self->fh->print($self->{_buf} . $FOOTER);
}
sub _add_citation {
my ($self, $pub) = @_;
my $host = $self->host;
my $links = $self->links;
my $cite = $pub->{citation}{$self->style} // '';
# replace all html tags in the citation with their rtf equivalent
$cite =~ s/<em>(.*?)<\/em>/\{\\i $1}/g;
$cite =~ s/<i>(.*?)<\/i>/\{\\i $1}/g;
$cite =~ s/&/&/g;
$cite
=~ s/<span style="text-decoration:underline;">(.*?)<\/span>/{\\u $1}/g;
$cite =~ s/<br \/>/\\line /g;
$cite =~ s/“/\\ldblquote/g;
$cite =~ s/”/\\rdblquote /g;
my $indent;
if ($cite
=~ /<div style="text-indent:-25px; padding-left:25px;padding-bottom:0px;">(.*?)<\/div>/
)
{
$indent = 1;
$cite
=~ s/<div style="text-indent:-25px; padding-left:25px;padding-bottom:0px;">(.*?)<\/div>/\\li380 \\fi-380 $1 /g;
}
$cite =~ s/<div>(.*?)<\/div>/$1/g;
my $hyperlink;
if ($cite =~ /<a href\=\"(.*?)\"(\starget\=\"_blank\")*>(.*?)<\/a>/) {
$cite
=~ s/<a href\=\"(.*?)\"(\starget\=\"_blank\")*>(.*?)<\/a>/____/g;
$hyperlink = "{\\field{\\*\\fldinst HYPERLINK $1}{\\fldrslt $3}}";
}
$cite =~ s/ /___/g;
# convert everything that isn't rtf or a space into hex (necessary for dealing with utf8/non-utf8 characters)
# utf8::encode worked, but delivered (the Catmandu typical) double encodings in the file
# utf8::decode got rid of the double encodings but the decoded string was not allowed in the rtf format
# - so: hex, it is!
$cite
=~ s/([^\\\\u|\\\\i|\\\\ldblquote|\\\\rdblquote|\\\\li380|\\\\fi\-380|\\\\line|___|\{|\}|\(|\)|[0-9]|\s])/sprintf("\\'%02x",ord($1))/eg;
# BUT rtf only works with hex codes consisting of 2 characters, everything that's longer gets cut after 2 (see hash above)
# So, replace everything that has a longer hex representation with stuff from the list or nothing at all
while ($cite =~ /\\\'(\d{3,4})/) {
my $hexv = $1;
if ($HEXMAP->{$hexv}) {
$cite =~ s/\\\'$hexv/$HEXMAP->{$hexv}/g;
}
else {
$cite =~ s/\\\'$hexv//g;
}
}
my $citestring = "{\\pard ";
# in case you want the title displayed as link
if ($self->style eq "short") {
my $title = $pub->{title};
$title =~ s/ /___/g if $title;
$title
=~ s/([^\\\\u|\\\\i|\\\\line|___|\{|\}|\(|\)|[0-9]|\s])/sprintf("\\'%02x",ord($1))/eg;
while ($title =~ /\\\'(\d{3,4})/) { # why while??
my $hexv = $1;
if ($HEXMAP->{$hexv}) {
$title =~ s/\\\'$hexv/$HEXMAP->{$hexv}/g;
}
else {
$title =~ s/\\\'$hexv//g;
}
}
$citestring
.= "{\\field{\\*\\fldinst HYPERLINK $host/record/$pub->{_id}}{\\fldrslt "
. $title
. "}}\\line ";
}
$citestring .= $cite;
if ($indent and $links) {
$citestring .= "\\li380 " . $self->_add_links($pub);
}
elsif ($links) {
$citestring .= $self->_add_links($pub);
}
$citestring .= "\\line\\par}\n";
$citestring =~ s/____/$hyperlink/g if $hyperlink;
$citestring =~ s/___/ /g;
$self->{_buf} .= $citestring;
}
sub _add_links {
my ($self, $pub) = @_;
my $host = $self->host;
my $links = $self->links;
my $name = $self->name;
my $line;
$line
= "\\line $name: {\\field{\\*\\fldinst HYPERLINK $host/record/$pub->{_id}}{\\fldrslt $host/record/$pub->{_id}}}";
if ($links && $links == 1) {
if ($pub->{doi}) {
$line
.= "\\line DOI: {\\field{\\*\\fldinst HYPERLINK https://doi.org/$pub->{doi}}{\\fldrslt $pub->{doi}}}";
}
if (my $ext = $pub->{external_id}) {
if ($ext->{isi}->[0]) {
$line
.= "\\line WoS: {\\field{\\*\\fldinst HYPERLINK https://ws.isiknowledge.com/cps/openurl/service?url_ver=Z39.88-2004&rft_id=info:ut/$ext->{isi}->[0]}{\\fldrslt $ext->{isi}->[0]}}";
}
if ($ext->{pmid}->[0]) {
$line .= "\\line PMID: $ext->{pmid}->[0]";
}
if ($ext->{arxiv}->[0]) {
$line
.= "\\line arXiv: {\\field{\\*\\fldinst HYPERLINK http://arxiv.org/abs/$ext->{arxiv}->[0]}{\\fldrslt $ext->{arxiv}->[0]}}";
}
if ($ext->{inspire}->[0]) {
$line
.= "\\line Inspire: {\\field{\\*\\fldinst HYPERLINK http://inspirehep.net/record/$ext->{inspire}->[0]}{\\fldrslt $ext->{inspire}->[0]}}";
}
}
}
$line;
}
1;
=pod
=head1 NAME
Catmandu::Exporter::RTF - a RTF exporter
=head1 SYNOPSIS
use Catmandu::Exporter::RTF;
my $rtf = Catmandu::Exporter::RTF->new(
file => "publications.rtf",
style => "ama",
name => "MyRepo",
);
my $data = {...};
$rtf->add($data);
$rtf->commit;
=head1 DESCRIPTION
This L<Catmandu::Exporter> exports items in RTF by using citation styles.
=head1 CONFIGURATION
=over
=item file
Write output to a local file given by its path or file handle. Alternatively a
scalar reference can be passed to write to a string and a code reference can be
used to write to a callback function.
=item fh
Write the output to an L<IO::Handle>. If not specified,
L<Catmandu::Util::io|Catmandu::Util/IO-functions> is used to create the output
handle from the C<file> argument or by using STDOUT.
=item fix
An ARRAY of one or more fixes or file scripts to be applied to exported items.
=item style
The citation style to use. In this case the data should have a key B<citation.$style>.
=item links
0|1. Display external links (e.g. DOI, WoS, PMID, etc)
=item name
Name of the repository to display in case you have set the option B<links to 1>.
=back
=head1 SEE ALSO
L<Catmandu::Exporter>
=cut