248 lines
4.5 KiB
Perl
248 lines
4.5 KiB
Perl
|
#! /usr/bin/perl
|
||
|
|
||
|
# This is a skeleton of a perl script for processing the output of
|
||
|
# sgmls. You must change the parts marked with "XXX".
|
||
|
|
||
|
# XXX This is for troff: in data, turn \ into \e (which prints as \).
|
||
|
# Backslashes in SDATA entities are left as backslashes.
|
||
|
|
||
|
$backslash_in_data = "\\e";
|
||
|
|
||
|
$prog = $0;
|
||
|
|
||
|
$prog =~ s|.*/||;
|
||
|
|
||
|
$level = 0;
|
||
|
|
||
|
while (<STDIN>) {
|
||
|
chop;
|
||
|
$command = substr($_, 0, 1);
|
||
|
substr($_, 0, 1) = "";
|
||
|
if ($command eq '(') {
|
||
|
&start_element($_);
|
||
|
$level++;
|
||
|
}
|
||
|
elsif ($command eq ')') {
|
||
|
$level--;
|
||
|
&end_element($_);
|
||
|
foreach $key (keys %attribute_value) {
|
||
|
@splitkey = split($;, $key);
|
||
|
if ($splitkey[0] == $level) {
|
||
|
delete $attribute_value{$key};
|
||
|
delete $attribute_type{$key};
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
elsif ($command eq '-') {
|
||
|
&unescape_data($_);
|
||
|
&data($_);
|
||
|
}
|
||
|
elsif ($command eq 'A') {
|
||
|
@field = split(/ /, $_, 3);
|
||
|
$attribute_type{$level,$field[0]} = $field[1];
|
||
|
&unescape_data($field[2]);
|
||
|
$attribute_value{$level,$field[0]} = $field[2];
|
||
|
}
|
||
|
elsif ($command eq '&') {
|
||
|
&entity($_);
|
||
|
}
|
||
|
elsif ($command eq 'D') {
|
||
|
@field = split(/ /, $_, 4);
|
||
|
$data_attribute_type{$field[0], $field[1]} = $field[2];
|
||
|
&unescape_data($field[3]);
|
||
|
$data_attribute_value{$field[0], $field[1]} = $field[3];
|
||
|
}
|
||
|
elsif ($command eq 'N') {
|
||
|
$notation{$_} = 1;
|
||
|
if (defined($sysid)) {
|
||
|
$notation_sysid{$_} = $sysid;
|
||
|
undef($sysid);
|
||
|
}
|
||
|
if (defined($pubid)) {
|
||
|
$notation_pubid{$_} = $pubid;
|
||
|
undef($pubid);
|
||
|
}
|
||
|
}
|
||
|
elsif ($command eq 'I') {
|
||
|
@field = split(/ /, $_, 3);
|
||
|
$entity_type{$field[0]} = $field[1];
|
||
|
&unescape($field[2]);
|
||
|
# You may want to substitute \e for \ if the type is CDATA.
|
||
|
$entity_text{$field[0]} = $field[2];
|
||
|
$entity_code{$field[0]} = 'I';
|
||
|
}
|
||
|
elsif ($command eq 'E') {
|
||
|
@field = split(/ /, $_);
|
||
|
$entity_code{$field[0]} = 'E';
|
||
|
$entity_type{$field[0]} = $field[1];
|
||
|
$entity_notation{$field[0]} = $field[2];
|
||
|
if (defined(@files)) {
|
||
|
foreach $i (0..$#files) {
|
||
|
$entity_filename{$field[0], $i} = $files[i];
|
||
|
}
|
||
|
undef(@files);
|
||
|
}
|
||
|
if (defined($sysid)) {
|
||
|
$entity_sysid{$field[0]} = $sysid;
|
||
|
undef($sysid);
|
||
|
}
|
||
|
if (defined($pubid)) {
|
||
|
$entity_pubid{$field[0]} = $pubid;
|
||
|
undef($pubid);
|
||
|
}
|
||
|
}
|
||
|
elsif ($command eq 'S') {
|
||
|
$entity_code{$_} = 'S';
|
||
|
if (defined(@files)) {
|
||
|
foreach $i (0..$#files) {
|
||
|
$entity_filename{$_, $i} = $files[i];
|
||
|
}
|
||
|
undef(@files);
|
||
|
}
|
||
|
if (defined($sysid)) {
|
||
|
$entity_sysid{$_} = $sysid;
|
||
|
undef($sysid);
|
||
|
}
|
||
|
if (defined($pubid)) {
|
||
|
$entity_pubid{$_} = $pubid;
|
||
|
undef($pubid);
|
||
|
}
|
||
|
}
|
||
|
elsif ($command eq '?') {
|
||
|
&unescape($_);
|
||
|
&pi($_);
|
||
|
}
|
||
|
elsif ($command eq 'L') {
|
||
|
@field = split(/ /, $_);
|
||
|
$lineno = $field[0];
|
||
|
if ($#field >= 1) {
|
||
|
&unescape($field[1]);
|
||
|
$filename = $field[1];
|
||
|
}
|
||
|
}
|
||
|
elsif ($command eq 'V') {
|
||
|
@field = split(/ /, $_, 2);
|
||
|
&unescape($field[1]);
|
||
|
$environment{$field[0]} = $field[1];
|
||
|
}
|
||
|
elsif ($command eq '{') {
|
||
|
&start_subdoc($_);
|
||
|
}
|
||
|
elsif ($command eq '}') {
|
||
|
&end_subdoc($_);
|
||
|
}
|
||
|
elsif ($command eq 'f') {
|
||
|
&unescape($_);
|
||
|
push(@files, $_);
|
||
|
}
|
||
|
elsif ($command eq 'p') {
|
||
|
&unescape($_);
|
||
|
$pubid = $_;
|
||
|
}
|
||
|
elsif ($command eq 's') {
|
||
|
&unescape($_);
|
||
|
$sysid = $_;
|
||
|
}
|
||
|
elsif ($command eq 'C') {
|
||
|
$conforming = 1;
|
||
|
}
|
||
|
else {
|
||
|
warn "$prog:$ARGV:$.: unrecognized command \`$command'\n";
|
||
|
}
|
||
|
}
|
||
|
|
||
|
sub unescape {
|
||
|
$_[0] =~ s/\\([0-7][0-7]?[0-7]?|.)/&esc($1)/eg;
|
||
|
}
|
||
|
|
||
|
sub esc {
|
||
|
local($_) = $_[0];
|
||
|
if ($_ eq '012' || $_ eq '12') {
|
||
|
""; # ignore RS
|
||
|
}
|
||
|
elsif (/^[0-7]/) {
|
||
|
sprintf("%c", oct);
|
||
|
}
|
||
|
elsif ($_ eq 'n') {
|
||
|
"\n";
|
||
|
}
|
||
|
elsif ($_ eq '|') {
|
||
|
"";
|
||
|
}
|
||
|
elsif ($_ eq "\\") {
|
||
|
"\\";
|
||
|
}
|
||
|
else {
|
||
|
$_;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
sub unescape_data {
|
||
|
local($sdata) = 0;
|
||
|
$_[0] =~ s/\\([0-7][0-7]?[0-7]?|.)/&esc_data($1)/eg;
|
||
|
}
|
||
|
|
||
|
sub esc_data {
|
||
|
local($_) = $_[0];
|
||
|
if ($_ eq '012' || $_ eq '12') {
|
||
|
""; # ignore RS
|
||
|
}
|
||
|
elsif (/^[0-7]/) {
|
||
|
sprintf("%c", oct);
|
||
|
}
|
||
|
elsif ($_ eq 'n') {
|
||
|
"\n";
|
||
|
}
|
||
|
elsif ($_ eq '|') {
|
||
|
$sdata = !$sdata;
|
||
|
"";
|
||
|
}
|
||
|
elsif ($_ eq "\\") {
|
||
|
$sdata ? "\\" : $backslash_in_data;
|
||
|
}
|
||
|
else {
|
||
|
$_;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
sub start_element {
|
||
|
local($gi) = $_[0];
|
||
|
# XXX
|
||
|
}
|
||
|
|
||
|
sub end_element {
|
||
|
local($gi) = $_[0];
|
||
|
# XXX
|
||
|
}
|
||
|
|
||
|
sub data {
|
||
|
local($data) = $_[0];
|
||
|
# XXX
|
||
|
}
|
||
|
|
||
|
# A processing instruction.
|
||
|
|
||
|
sub pi {
|
||
|
local($data) = $_[0];
|
||
|
# XXX
|
||
|
}
|
||
|
|
||
|
# A reference to an external entity.
|
||
|
|
||
|
sub entity {
|
||
|
local($name) = $_[0];
|
||
|
# XXX
|
||
|
}
|
||
|
|
||
|
sub start_subdoc {
|
||
|
local($name) = $_[0];
|
||
|
# XXX
|
||
|
}
|
||
|
|
||
|
sub end_subdoc {
|
||
|
local($name) = $_[0];
|
||
|
# XXX
|
||
|
}
|
||
|
|