276da39af9
Approved by: roberto, delphij Security: VuXML: 0d0f3050-1f69-11e5-9ba9-d050996490d0 Security: http://bugs.ntp.org/show_bug.cgi?id=2853 Security: https://www.kb.cert.org/vuls/id/668167 Security: http://support.ntp.org/bin/view/Main/SecurityNotice#June_2015_NTP_Security_Vulnerabi
543 lines
15 KiB
Perl
543 lines
15 KiB
Perl
=begin comment
|
|
|
|
## Mdoc.pm -- Perl functions for mdoc processing
|
|
##
|
|
## Author: Oliver Kindernay (GSoC project for NTP.org)
|
|
##
|
|
##
|
|
## This file is part of AutoOpts, a companion to AutoGen.
|
|
## AutoOpts is free software.
|
|
## AutoOpts is Copyright (C) 1992-2015 by Bruce Korb - all rights reserved
|
|
##
|
|
## AutoOpts is available under any one of two licenses. The license
|
|
## in use must be one of these two and the choice is under the control
|
|
## of the user of the license.
|
|
##
|
|
## The GNU Lesser General Public License, version 3 or later
|
|
## See the files "COPYING.lgplv3" and "COPYING.gplv3"
|
|
##
|
|
## The Modified Berkeley Software Distribution License
|
|
## See the file "COPYING.mbsd"
|
|
##
|
|
## These files have the following sha256 sums:
|
|
##
|
|
## 8584710e9b04216a394078dc156b781d0b47e1729104d666658aecef8ee32e95 COPYING.gplv3
|
|
## 4379e7444a0e2ce2b12dd6f5a52a27a4d02d39d247901d3285c88cf0d37f477b COPYING.lgplv3
|
|
## 13aa749a5b0a454917a944ed8fffc530b784f5ead522b1aacaf4ec8aa55a6239 COPYING.mbsd
|
|
=end comment
|
|
=head1 NAME
|
|
|
|
Mdoc - perl module to parse Mdoc macros
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
use Mdoc qw(ns pp soff son stoggle mapwords);
|
|
|
|
See mdoc2man and mdoc2texi for code examples.
|
|
|
|
=head1 FUNCTIONS
|
|
|
|
=over 4
|
|
|
|
=item def_macro( NAME, CODE, [ raw => 1, greedy => 1, concat_until => '.Xx' ] )
|
|
|
|
Define new macro. The CODE reference will be called by call_macro(). You can
|
|
have two distinct definitions for and inline macro and for a standalone macro
|
|
(i. e. 'Pa' and '.Pa').
|
|
|
|
The CODE reference is passed a list of arguments and is expected to return list
|
|
of strings and control characters (see C<CONSTANTS>).
|
|
|
|
By default the surrouding "" from arguments to macros are removed, use C<raw>
|
|
to disable this.
|
|
|
|
Normaly CODE reference is passed all arguments up to next nested macro. Set
|
|
C<greedy> to to pass everything up to the end of the line.
|
|
|
|
If the concat_until is present, the line is concated until the .Xx macro is
|
|
found. For example the following macro definition
|
|
|
|
def_macro('.Oo', gen_encloser(qw([ ]), concat_until => '.Oc' }
|
|
def_macro('.Cm', sub { mapwords {'($_)'} @_ } }
|
|
|
|
and the following input
|
|
|
|
.Oo
|
|
.Cm foo |
|
|
.Cm bar |
|
|
.Oc
|
|
|
|
results in [(foo) | (bar)]
|
|
|
|
=item get_macro( NAME )
|
|
|
|
Returns a hash reference like:
|
|
|
|
{ run => CODE, raw => [1|0], greedy => [1|0] }
|
|
|
|
Where C<CODE> is the CODE reference used to define macro called C<NAME>
|
|
|
|
=item parse_line( INPUT, OUTPUT_CODE, PREPROCESS_CODE )
|
|
|
|
Parse a line from the C<INPUT> filehandle. If a macro was detected it returns a
|
|
list (MACRO_NAME, @MACRO_ARGS), otherwise it calls the C<OUTPUT_CODE>, giving
|
|
caller a chance to modify line before printing it. If C<PREPROCESS_CODE> is
|
|
defined it calls it prior to passing argument to a macro, giving caller a
|
|
chance to alter them. if EOF was reached undef is returned.
|
|
|
|
=item call_macro( MACRO, ARGS, ... )
|
|
|
|
Call macro C<MACRO> with C<ARGS>. The CODE reference for macro C<MACRO> is
|
|
called and for all the nested macros. Every called macro returns a list which
|
|
is appended to return value and returned when all nested macros are processed.
|
|
Use to_string() to produce a printable string from the list.
|
|
|
|
=item to_string ( LIST )
|
|
|
|
Processes C<LIST> returned from call_macro() and returns formatted string.
|
|
|
|
=item mapwords BLOCK ARRAY
|
|
|
|
This is like perl's map only it calls BLOCK only on elements which are not
|
|
punctuation or control characters.
|
|
|
|
=item space ( ['on'|'off] )
|
|
|
|
Turn spacing on or off. If called without argument it returns the current state.
|
|
|
|
=item gen_encloser ( START, END )
|
|
|
|
Helper function for generating macros that enclose their arguments.
|
|
gen_encloser(qw({ }));
|
|
returns
|
|
sub { '{', ns, @_, ns, pp('}')}
|
|
|
|
=item set_Bl_callback( CODE , DEFS )
|
|
|
|
This module implements the Bl/El macros for you. Using set_Bl_callback you can
|
|
provide a macro definition that should be executed on a .Bl call.
|
|
|
|
=item set_El_callback( CODE , DEFS )
|
|
|
|
This module implements the Bl/El macros for you. Using set_El_callback you can
|
|
provide a macro definition that should be executed on a .El call.
|
|
|
|
=item set_Re_callback( CODE )
|
|
|
|
The C<CODE> is called after a Rs/Re block is done. With a hash reference as a
|
|
parameter, describing the reference.
|
|
|
|
=back
|
|
|
|
=head1 CONSTANTS
|
|
|
|
=over 4
|
|
|
|
=item ns
|
|
|
|
Indicate 'no space' between to members of the list.
|
|
|
|
=item pp ( STRING )
|
|
|
|
The string is 'punctuation point'. It means that every punctuation
|
|
preceeding that element is put behind it.
|
|
|
|
=item soff
|
|
|
|
Turn spacing off.
|
|
|
|
=item son
|
|
|
|
Turn spacing on.
|
|
|
|
=item stoggle
|
|
|
|
Toogle spacing.
|
|
|
|
=item hs
|
|
|
|
Print space no matter spacing mode.
|
|
|
|
=back
|
|
|
|
=head1 TODO
|
|
|
|
* The concat_until only works with standalone macros. This means that
|
|
.Po blah Pc
|
|
will hang until .Pc in encountered.
|
|
|
|
* Provide default macros for Bd/Ed
|
|
|
|
* The reference implementation is uncomplete
|
|
|
|
=cut
|
|
|
|
package Mdoc;
|
|
use strict;
|
|
use warnings;
|
|
use List::Util qw(reduce);
|
|
use Text::ParseWords qw(quotewords);
|
|
use Carp;
|
|
use Exporter qw(import);
|
|
our @EXPORT_OK = qw(ns pp soff son stoggle hs mapwords gen_encloser nl);
|
|
|
|
use constant {
|
|
ns => ['nospace'],
|
|
soff => ['spaceoff'],
|
|
son => ['spaceon'],
|
|
stoggle => ['spacetoggle'],
|
|
hs => ['hardspace'],
|
|
};
|
|
|
|
sub pp {
|
|
my $c = shift;
|
|
return ['pp', $c ];
|
|
}
|
|
sub gen_encloser {
|
|
my ($o, $c) = @_;
|
|
return sub { ($o, ns, @_, ns, pp($c)) };
|
|
}
|
|
|
|
sub mapwords(&@) {
|
|
my ($f, @l) = @_;
|
|
my @res;
|
|
for my $el (@l) {
|
|
local $_ = $el;
|
|
push @res, $el =~ /^(?:[,\.\{\}\(\):;\[\]\|])$/ || ref $el eq 'ARRAY' ?
|
|
$el : $f->();
|
|
}
|
|
return @res;
|
|
}
|
|
|
|
my %macros;
|
|
|
|
###############################################################################
|
|
|
|
# Default macro definitions start
|
|
|
|
###############################################################################
|
|
|
|
def_macro('Xo', sub { @_ }, concat_until => '.Xc');
|
|
|
|
def_macro('.Ns', sub {ns, @_});
|
|
def_macro('Ns', sub {ns, @_});
|
|
|
|
{
|
|
my %reference;
|
|
def_macro('.Rs', sub { () } );
|
|
def_macro('.%A', sub {
|
|
if ($reference{authors}) {
|
|
$reference{authors} .= " and @_"
|
|
}
|
|
else {
|
|
$reference{authors} = "@_";
|
|
}
|
|
return ();
|
|
});
|
|
def_macro('.%T', sub { $reference{title} = "@_"; () } );
|
|
def_macro('.%O', sub { $reference{optional} = "@_"; () } );
|
|
|
|
sub set_Re_callback {
|
|
my ($sub) = @_;
|
|
croak 'Not a CODE reference' if not ref $sub eq 'CODE';
|
|
def_macro('.Re', sub {
|
|
my @ret = $sub->(\%reference);
|
|
%reference = (); @ret
|
|
});
|
|
return;
|
|
}
|
|
}
|
|
|
|
def_macro('.Bl', sub { die '.Bl - no list callback set' });
|
|
def_macro('.It', sub { die ".It called outside of list context - maybe near line $." });
|
|
def_macro('.El', sub { die '.El requires .Bl first' });
|
|
|
|
|
|
{
|
|
my $elcb = sub { () };
|
|
|
|
sub set_El_callback {
|
|
my ($sub) = @_;
|
|
croak 'Not a CODE reference' if ref $sub ne 'CODE';
|
|
$elcb = $sub;
|
|
return;
|
|
}
|
|
|
|
sub set_Bl_callback {
|
|
my ($blcb, %defs) = @_;
|
|
croak 'Not a CODE reference' if ref $blcb ne 'CODE';
|
|
def_macro('.Bl', sub {
|
|
|
|
my $orig_it = get_macro('.It');
|
|
my $orig_el = get_macro('.El');
|
|
my $orig_bl = get_macro('.Bl');
|
|
my $orig_elcb = $elcb;
|
|
|
|
# Restore previous .It and .El on each .El
|
|
def_macro('.El', sub {
|
|
def_macro('.El', delete $orig_el->{run}, %$orig_el);
|
|
def_macro('.It', delete $orig_it->{run}, %$orig_it);
|
|
def_macro('.Bl', delete $orig_bl->{run}, %$orig_bl);
|
|
my @ret = $elcb->(@_);
|
|
$elcb = $orig_elcb;
|
|
@ret
|
|
});
|
|
$blcb->(@_)
|
|
}, %defs);
|
|
return;
|
|
}
|
|
}
|
|
|
|
def_macro('.Sm', sub {
|
|
my ($arg) = @_;
|
|
if (defined $arg) {
|
|
space($arg);
|
|
} else {
|
|
space() eq 'off' ?
|
|
space('on') :
|
|
space('off');
|
|
}
|
|
()
|
|
} );
|
|
def_macro('Sm', do { my $off; sub {
|
|
my ($arg) = @_;
|
|
if (defined $arg && $arg =~ /^(on|off)$/) {
|
|
shift;
|
|
if ($arg eq 'off') { soff, @_; }
|
|
elsif ($arg eq 'on') { son, @_; }
|
|
}
|
|
else {
|
|
stoggle, @_;
|
|
}
|
|
}} );
|
|
|
|
###############################################################################
|
|
|
|
# Default macro definitions end
|
|
|
|
###############################################################################
|
|
|
|
sub def_macro {
|
|
croak 'Odd number of elements for hash argument <'.(scalar @_).'>' if @_%2;
|
|
my ($macro, $sub, %def) = @_;
|
|
croak 'Not a CODE reference' if ref $sub ne 'CODE';
|
|
|
|
$macros{ $macro } = {
|
|
run => $sub,
|
|
greedy => delete $def{greedy} || 0,
|
|
raw => delete $def{raw} || 0,
|
|
concat_until => delete $def{concat_until},
|
|
};
|
|
if ($macros{ $macro }{concat_until}) {
|
|
$macros{ $macros{ $macro }{concat_until} } = { run => sub { @_ } };
|
|
$macros{ $macro }{greedy} = 1;
|
|
}
|
|
return;
|
|
}
|
|
|
|
sub get_macro {
|
|
my ($macro) = @_;
|
|
croak "Macro <$macro> not defined" if not exists $macros{ $macro };
|
|
+{ %{ $macros{ $macro } } }
|
|
}
|
|
|
|
#TODO: document this
|
|
sub parse_opts {
|
|
my %args;
|
|
my $last;
|
|
for (@_) {
|
|
if ($_ =~ /^\\?-/) {
|
|
s/^\\?-//;
|
|
$args{$_} = 1;
|
|
$last = _unquote($_);
|
|
}
|
|
else {
|
|
$args{$last} = _unquote($_) if $last;
|
|
undef $last;
|
|
}
|
|
}
|
|
return %args;
|
|
}
|
|
|
|
sub _is_control {
|
|
my ($el, $expected) = @_;
|
|
if (defined $expected) {
|
|
ref $el eq 'ARRAY' and $el->[0] eq $expected;
|
|
}
|
|
else {
|
|
ref $el eq 'ARRAY';
|
|
}
|
|
}
|
|
|
|
{
|
|
my $sep = ' ';
|
|
|
|
sub to_string {
|
|
if (@_ > 0) {
|
|
# Handle punctunation
|
|
my ($in_brace, @punct) = '';
|
|
my @new = map {
|
|
if (/^([\[\(])$/) {
|
|
($in_brace = $1) =~ tr/([/)]/;
|
|
$_, ns
|
|
}
|
|
elsif (/^([\)\]])$/ && $in_brace eq $1) {
|
|
$in_brace = '';
|
|
ns, $_
|
|
}
|
|
elsif ($_ =~ /^[,\.;:\?\!\)\]]$/) {
|
|
push @punct, ns, $_;
|
|
();
|
|
}
|
|
elsif (_is_control($_, 'pp')) {
|
|
$_->[1]
|
|
}
|
|
elsif (_is_control($_)) {
|
|
$_
|
|
}
|
|
else {
|
|
splice (@punct), $_;
|
|
}
|
|
} @_;
|
|
push @new, @punct;
|
|
|
|
# Produce string out of an array dealing with the special control characters
|
|
# space('off') must but one character delayed
|
|
my ($no_space, $space_off) = 1;
|
|
my $res = '';
|
|
while (defined(my $el = shift @new)) {
|
|
if (_is_control($el, 'hardspace')) { $no_space = 1; $res .= ' ' }
|
|
elsif (_is_control($el, 'nospace')) { $no_space = 1; }
|
|
elsif (_is_control($el, 'spaceoff')) { $space_off = 1; }
|
|
elsif (_is_control($el, 'spaceon')) { space('on'); }
|
|
elsif (_is_control($el, 'spacetoggle')) { space() eq 'on' ?
|
|
$space_off = 1 :
|
|
space('on') }
|
|
else {
|
|
if ($no_space) {
|
|
$no_space = 0;
|
|
$res .= "$el"
|
|
}
|
|
else {
|
|
$res .= "$sep$el"
|
|
}
|
|
|
|
if ($space_off) { space('off'); $space_off = 0; }
|
|
}
|
|
}
|
|
$res
|
|
}
|
|
else {
|
|
'';
|
|
}
|
|
}
|
|
|
|
sub space {
|
|
my ($arg) = @_;
|
|
if (defined $arg && $arg =~ /^(on|off)$/) {
|
|
$sep = ' ' if $arg eq 'on';
|
|
$sep = '' if $arg eq 'off';
|
|
return;
|
|
}
|
|
else {
|
|
return $sep eq '' ? 'off' : 'on';
|
|
}
|
|
}
|
|
}
|
|
|
|
sub _unquote {
|
|
my @args = @_;
|
|
$_ =~ s/^"([^"]+)"$/$1/g for @args;
|
|
wantarray ? @args : $args[0];
|
|
}
|
|
|
|
sub call_macro {
|
|
my ($macro, @args) = @_;
|
|
my @ret;
|
|
|
|
my @newargs;
|
|
my $i = 0;
|
|
|
|
@args = _unquote(@args) if (!$macros{ $macro }{raw});
|
|
|
|
# Call any callable macros in the argument list
|
|
for (@args) {
|
|
if ($_ =~ /^[A-Z][a-z]+$/ && exists $macros{ $_ }) {
|
|
push @ret, call_macro($_, @args[$i+1 .. $#args]);
|
|
last;
|
|
} else {
|
|
if ($macros{ $macro }{greedy}) {
|
|
push @ret, $_;
|
|
}
|
|
else {
|
|
push @newargs, $_;
|
|
}
|
|
}
|
|
$i++;
|
|
}
|
|
|
|
if ($macros{ $macro }{concat_until}) {
|
|
my ($n_macro, @n_args) = ('');
|
|
while (1) {
|
|
die "EOF was reached and no $macros{ $macro }{concat_until} found"
|
|
if not defined $n_macro;
|
|
($n_macro, @n_args) = parse_line(undef, sub { push @ret, shift });
|
|
if ($n_macro eq $macros{ $macro }{concat_until}) {
|
|
push @ret, call_macro($n_macro, @n_args);
|
|
last;
|
|
}
|
|
else {
|
|
$n_macro =~ s/^\.//;
|
|
push @ret, call_macro($n_macro, @n_args) if exists $macros{ $n_macro };
|
|
}
|
|
}
|
|
}
|
|
|
|
if ($macros{ $macro }{greedy}) {
|
|
#print "MACROG $macro (", (join ', ', @ret), ")\n";
|
|
return $macros{ $macro }{run}->(@ret);
|
|
}
|
|
else {
|
|
#print "MACRO $macro (", (join ', ', @newargs), ")".(join ', ', @ret)."\n";
|
|
return $macros{ $macro }{run}->(@newargs), @ret;
|
|
}
|
|
}
|
|
|
|
{
|
|
my ($in_fh, $out_sub, $preprocess_sub);
|
|
sub parse_line {
|
|
$in_fh = $_[0] if defined $_[0] || !defined $in_fh;
|
|
$out_sub = $_[1] if defined $_[1] || !defined $out_sub;
|
|
$preprocess_sub = $_[2] if defined $_[2] || !defined $preprocess_sub;
|
|
|
|
croak 'out_sub not a CODE reference'
|
|
if not ref $out_sub eq 'CODE';
|
|
croak 'preprocess_sub not a CODE reference'
|
|
if defined $preprocess_sub && not ref $preprocess_sub eq 'CODE';
|
|
|
|
while (my $line = <$in_fh>) {
|
|
chomp $line;
|
|
if ($line =~ /^\.[A-z][a-z0-9]+/ || $line =~ /^\.%[A-Z]/ ||
|
|
$line =~ /^\.\\"/)
|
|
{
|
|
$line =~ s/ +/ /g;
|
|
my ($macro, @args) = quotewords(' ', 1, $line);
|
|
@args = grep { defined $_ } @args;
|
|
$preprocess_sub->(@args) if defined $preprocess_sub;
|
|
if ($macro && exists $macros{ $macro }) {
|
|
return ($macro, @args);
|
|
} else {
|
|
$out_sub->($line);
|
|
}
|
|
}
|
|
else {
|
|
$out_sub->($line);
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
}
|
|
|
|
1;
|
|
__END__
|