From 8c1a71988deac1f2fccbd9130d394043155fd520 Mon Sep 17 00:00:00 2001 From: jfieber Date: Tue, 9 May 1995 23:58:06 +0000 Subject: [PATCH] The program that turns sgml files (tagged according to the linuxdoc DTD) into HTML, LaTeX or ascii. (the latter is still pretty rough) Reviewed by: rgrimes --- usr.bin/sgmlfmt/Makefile | 9 + usr.bin/sgmlfmt/sgmlfmt.1 | 105 +++++++ usr.bin/sgmlfmt/sgmlfmt.pl | 570 +++++++++++++++++++++++++++++++++++++ 3 files changed, 684 insertions(+) create mode 100644 usr.bin/sgmlfmt/Makefile create mode 100644 usr.bin/sgmlfmt/sgmlfmt.1 create mode 100755 usr.bin/sgmlfmt/sgmlfmt.pl diff --git a/usr.bin/sgmlfmt/Makefile b/usr.bin/sgmlfmt/Makefile new file mode 100644 index 000000000000..835c05f84569 --- /dev/null +++ b/usr.bin/sgmlfmt/Makefile @@ -0,0 +1,9 @@ +# $Id:$ + +MAN1= sgmlfmt.1 + +afterinstall: + install -c -o ${BINOWN} -g ${BINGRP} -m ${BINMODE} \ + ${.CURDIR}/sgmlfmt.pl ${DESTDIR}${BINDIR}/sgmlfmt + +.include diff --git a/usr.bin/sgmlfmt/sgmlfmt.1 b/usr.bin/sgmlfmt/sgmlfmt.1 new file mode 100644 index 000000000000..d1baf14352a7 --- /dev/null +++ b/usr.bin/sgmlfmt/sgmlfmt.1 @@ -0,0 +1,105 @@ +.Dd May 6, 1995 +.Os FreeBSD 2.0.5 +.Dt SGMLFMT 1 +.Sh NAME +.Nm sgmlfmt +.Nd Formats SGML files tagged according to the linuxdoc DTD. +.Sh SYNOPSIS +.Nm +.Fl Ar format Op Fl Ar format... +.Ar +.Sh DESCRIPTION +The +.Nm +command reads SGML files tagged according to the linuxdoc DTD, +validates them using the +.Xr sgmls 1 +parser and then converts them to the specified output format. +The input file must include the following document type +declaration before any uncommented text: +.Bd -literal -offset indent + +.Ed +.Pp +The +.Fl Ar format +options for output include the following: +.Bl -tag -width Ds +.It Fl html +Generates a set of linked HTML files suitable for use with an +HTML browser. A top level file, +.Pa file.html , +contains the title, author, date, abstract and brief table of +contents for the document. A file +.Pa file_toc.html +contains a complete table of contents. A series of files named +.Pa file-1.html , +.Pa file-2.html ... +.Pa file-n.html +contain the actual text of the document. +.It Fl latex +Generates a single output file with the extension +.Pa .tex +suitable for processing with LaTeX. Note that the LaTeX style +file +.Pa /usr/share/sgml/FreeBSD/lib/linuxdoc.sty +must be accessible to LaTeX for correct processing. +.It Fl ascii +Generates a single output file with the extension +.Pa .txt +suitable for viewing on an ASCII terminal. +.It Fl nroff +Generates a single output file with the extension +.Pa .nroff +suitable processing with +.Xr nroff 1 +or +.Xr groff 1 . +This is actually an intermediate conversion used by the +.Fl ascii +format option. +.El +.Pp +If the input file name ends with +.Pa .sgml , +the extension may be omitted on the command line. +In all cases, the output files are created in the current working +directory. +.Sh FILES +.Pa /usr/share/sgml/FreeBSD/dtd/linuxdoc +- the linuxdoc DTD. +.Pp +.Pa /usr/share/sgml/FreeBSD/rep/ +- directory containing replacement files for +.Xr sgmlsasp 1 . +.Pp +.Pa /usr/share/sgml/FreeBSD/lib/linuxdoc.sty +- the LaTeX style used in documents produced with the +.Fl latex +format option. +.Sh SEE ALSO +.Xr sgmls 1 , +.Xr sgmlsasp 1 , +.Xr groff 1 +.Sh HISTORY +The +.Nm +command appeared in Version 2.0.5 FreeBSD UNIX. +.Sh AUTHORS +The +.Nm +command was written by John Fieber +.Aq jfieber@FreeBSD.org . +The linuxdoc DTD was written by Matt Welsh +.Aq mdw@cs.cornell.edu +and based on the Qwertz DTD written by Tom Gordon +.Aq thomas.gordon@gmd.de . +.Sh BUGS +A line in the SGML source file beginning with a period (.) will +confuse +.Xr groff 1 +which is used to generate ASCII output. In general, the ASCII +output leaves much to be desired. +.Pp +The divison of the sources file into separate HTML files is +currently fixed. diff --git a/usr.bin/sgmlfmt/sgmlfmt.pl b/usr.bin/sgmlfmt/sgmlfmt.pl new file mode 100755 index 000000000000..959b1f0617b8 --- /dev/null +++ b/usr.bin/sgmlfmt/sgmlfmt.pl @@ -0,0 +1,570 @@ +#!/usr/bin/perl +# $Id:$ + +# Format an sgml document tagged according to the linuxdoc DTD. +# by John Fieber for the FreeBSD documentation +# project. +# +# Usage: sgmlformat -format [-format ...] inputfile [inputfile ...] +# +# -format outputfile format +# ------------------------------------------------------------- +# -html inputfile.html HTML +# -txt | -ascii inputfile.txt ascii text +# -tex | -latex inputfile.tex LaTeX +# -nroff inputfile.nroff groff for ms macros +# -ps inputfile.txt postscript +# +# Bugs: +# +# Text lines that start with a period (.) confuse the conversions that +# use groff. The workaround is to make sure the SGML source doesn't +# have any periods at the beginning of a line. + +####################################################################### + +# Look in a couple places for the SGML DTD and replacement files +# + +if (-d "$ENV{'HOME'}/lib/sgml/FreeBSD") { + $sgmldir = "$ENV{'HOME'}/lib/sgml"; +} +elsif (-d "$ENV{'HOME'}/sgml/FreeBSD") { + $sgmldir = "$ENV{'HOME'}/sgml"; +} +elsif (-d "/usr/share/sgml/FreeBSD" ) { + $sgmldir = "/usr/share/sgml"; +} +else { + die "Cannot locate sgml files!\n"; +} + +# +# Locate the DTD, an SGML declaration, and the replacement files +# + +$dtdbase = "$sgmldir/FreeBSD"; +$dtd = "$dtdbase/dtd/linuxdoc"; +if (-f "$dtd.dec") { + $decl = "$dtd.dec"; +} +else { + $decl = ""; +} +$replbase = "$dtdbase/rep"; + +if (! $ENV{"SGML_PATH"}) { + $ENV{"SGML_PATH"} = "$sgmldir/%O/%C/%T"; +} + +# +# Look for the file specified on the command line +# + +sub getfile { + local($filearg) = @_; + if (-f "$filearg.sgml") { + $file = "$filearg.sgml"; + } + elsif (-f $filearg) { + $file = $filearg; + } + else { + return 0; + } + $fileroot = $file; + $fileroot =~ s/.*\///; # drop the path + $fileroot =~ s/\.sgml$//; # drop the .sgml + $filepath = $file; + $filepath =~ s/\/*[^\/]*$//; + if ($filepath eq "") { + $ENV{"SGML_PATH"} .= ":."; + } + else { + $ENV{"SGML_PATH"} .= ":$filepath/%S:."; + } + return 1; +} + +# +# A function to run sgmls and sgmlsasp on the input file. +# +# Arguments: +# 1. A file handle for the output +# 2. A replacement file (directory actually) +# + +sub sgmlparse { + local($fhandle, $replacement) = @_; + $ENV{'SGML_PATH'} = "$replbase/$replacement.%N:$ENV{'SGML_PATH'}"; + open($fhandle, "sgmls $decl $file | sgmlsasp $replbase/$replacement.mapping |"); +} + +# +# Generate nroff output +# + +sub gen_nroff { + open (outfile, ">$fileroot.nroff"); + &sgmlparse(infile, "nroff"); + $\ = "\n"; # automatically add newline on print + while () { + chop; + # This is supposed to ensure that no text line starts + # with a dot (.), thus confusing groff, but it doesn't + # appear to work. + unless (/^\.DS/.../^\.DE/) { + s/^[ \t]{1,}(.*)/$1/g; + } + s/^\.[ \t].*/\\\&$&/g; + print outfile; + } + $\ = ""; + close(infile); + close(outfile); +} + +# +# Generate ASCII output using groff +# + +sub gen_ascii { + &sgmlparse(infile, "nroff"); + open(outfile, "| groff -T ascii -t -ms | col -b > $fileroot.txt"); + while () { + print outfile; + } + close(infile); + close(outfile); +} + +# +# Generate Postscript output using groff (this is suboptimal +# for printed output!) +# + +sub gen_ps { + &sgmlparse(infile, "grops"); + open(outfile, "| groff -T ps -t -ms > $fileroot.ps"); + while () { + print outfile; + } + close(infile); + close(outfile); +} + +# +# Generate LaTeX output +# + +sub gen_latex { + open(outfile, ">$fileroot.tex"); + &sgmlparse(infile, "latex"); + while () { + print outfile; + } + close(infile); + close(outfile); +} + + +# +# Generate HTML output. +# +# HTML is generated in two passes. +# +# The first pass takes the output from sgmlsasp and gathers information +# about the structure of the document that is used in the sceond pass +# for splitting the file into separate files. Targets for cross +# references are also stored in this pass. +# +# Based on the information from the first pass, the second pass +# generates a numbered series of HTML files, a "toplevel" file +# containing the title, author, abstract and a brief table of +# contents. A detailed table of contents is also generated. The +# second pass generates links for cross references and URLs. + +# +# Tunable parameters +# +$maxlevel = 3; # max section depth +$num_depth = 4; # depth of numbering +$m_depth = 2; # depth of menus + + +$sc = 0; # number of sections +$filecount = 0; # number of files + +sub gen_html { + local($i, $sl); + $tmpfile = "/tmp/sgmlf.$$"; + + open(bar, ">$tmpfile"); +# print STDERR "(Pass 1..."; + &sgmlparse(foo, "html"); + while () { + print bar; + # count up the number of files to be generated + if (/^<@@sect>/) { + $sl++; + $sc++; + $st_sl[$sc] = $sl; + + # Per level counters + $counter[$sl]++; + $counter[$sl + 1] = 0; + + # calculate the section number in the form x.y.z. + $st_num[$sc] = ""; + if ($sl <= $num_depth) { + for ($i = 1; $i <= $sl; $i++) { + $st_num[$sc] .= "$counter[$i]."; + } + $st_num[$sc] .= " "; + } + + # calculate the file number and output level + if ($sl <= $maxlevel) { + $filecount++; + $st_ol[$sc] = $sl; + } + else { + $st_ol[$sc] = $maxlevel; + } + + $st_file[$sc] = $filecount; + + # Calculate the highest level node in which this + # node should appear as a menu item. + $st_pl[$sc] = $sl - $m_depth; + if ($st_pl[$sc] < 0) { + $st_pl[$sc] = 0; + } + if ($st_pl[$sc] > $maxlevel) { + $st_pl[$sc] = $maxlevel; + } + } + if (/^<@@endsect>/) { + $sl--; + } + + # record the section number that a label occurs in + if (/^<@@label>/) { + chop; + s/^<@@label>//; + if ($references{$_} eq "") { + $references{$_} = "$filecount"; + } + else { + print STDERR "Warning: the label `$_' is multiply-defined.\n"; + } + } + } + close(bar); + +# print STDERR " Pass 2..."; + open(foofile, $tmpfile); + &html2html(foofile, "boo"); +# print STDERR ")\n"; + + unlink($tmpfile); +} + +# +# HTML conversion, pass number 2 +# + +sub html2html { + local($infile, $outfile) = @_; + local($i); + + $sc = 0; + push(@scs, $sc); + + open(tocfile, ">${fileroot}_toc.html"); + print tocfile "\n"; + + while (<$infile>) { + # change `<' and `>' to `<' and `>' in

+	if (/
/.../<\/pre>/) {
+	    s//<\1pre>/g;
+	    s/>/\>/g;
+	    s/<([\/]*)pre\>/<\1pre>/g;
+	}
+
+      tagsw: {
+	  # titles and headings
+	  if (s/^<@@title>//) {
+	      chop;
+	      print tocfile "\n$_\n\n";
+	      print tocfile "

$_

\n"; + $header[$st_ol[$sc]] = + "\n\n$_\n" . + "\n\n

$_

\n"; + $footer[$st_ol[$sc]] = "\n\n"; + last tagsw; + } + + # + # HEADER begin + # + if (s/^<@@head>//) { + chop; + + if ($part == 1) { + $text[0] .= "

Part $partnum:
$_"; + last tagsw; + } + + $href = "\"$fileroot-$st_file[$sc].html#$sc\""; + + # set up headers and footers + if ($st_sl[$sc] > 0 && $st_sl[$sc] <= $maxlevel) { + $header[$st_ol[$sc]] = + "\n\n$_\n\n" . + "\n$navbar[$st_ol[$sc]]\n
\n"; + $footer[$st_ol[$sc]] = + "
\n$navbar[$st_ol[$sc]]\n\n"; + } + + # Add this to the master table of contents + print tocfile "
$st_num[$sc]" . + "$_"; + + # Calculate the level to use in the HTML file + $hlevel = $st_sl[$sc] - $st_ol[$sc] + 2; + $shlevel = $st_sl[$sc] - $st_ol[$sc] + 3; + + $i = $st_ol[$sc]; + + # Add the section header + $text[$i] .= "$st_num[$sc]$_"; + $i--; + + # And also to the parent + if ($st_sl[$sc] == $st_ol[$sc] && $i >= 0) { + $text[$i] .= "$st_num[$sc]" . + "$_"; + $i--; + } + + # and to the grandparents + for (; $i >= $st_pl[$sc]; $i--) { + $text[$i] .= "
$st_num[$sc] " . + "$_"; + } + + last tagsw; + } + + # + # HEADER end + # + if (s/^<@@endhead>//) { + if ($part == 1) { + $text[0] .= "

\n"; + $part = 0; + last tagsw; + } + print tocfile "\n"; + + $i = $st_ol[$sc]; + + # Close the section header + $text[$i] .= "\n"; + $i--; + + # in the parent... + if ($st_sl[$sc] == $st_ol[$sc] && $i >= 0) { + $text[$i] .= "\n"; + $i--; + } + + # in the grandparent... + for (; $i >= $st_pl[$sc]; $i--) { + $text[$i] .= "\n"; + } + last tagsw; + } + + # sectioning + if (s/^<@@part>//) { + $part = 1; + $partnum++; + # not yet implemented in the DTD + last tagsw; + } + + # + # BEGINNING of a section + # + if (s/^<@@sect>//) { + # Increment the section counter and save it on a stack + # for future reference. + $sc++; + push(@scs, $sc); + + # Set up the navigation bar + if ($st_file[$sc] > $st_file[$sc - 1]) { + &navbar($st_file[$sc], $filecount, $sc); + } + + # Prepare for menu entries in the table of contents and + # parent file(s). + if ($st_sl[$sc - 1] < $st_sl[$sc]) { + print tocfile "
\n"; + $i = $st_ol[$sc] - 1 - ($st_sl[$sc] == $st_ol[$sc]); + for (; $i >= $st_pl[$sc]; $i--) { + $text[$i] .= "
\n"; + } + } + last tagsw; + } + + # + # END of a section + # + if (s/^<@@endsect>//) { + + # Remember the section number! Subsections may have + # altered the global $sc variable. + local ($lsc) = pop(@scs); + + # Close off subsection menus we may have created in + # parent file(s). + if ($st_sl[$lsc] > $st_sl[$sc + 1]) { + print tocfile "
\n"; + $i = $st_ol[$lsc] - 1 - ($st_sl[$lsc] == $st_ol[$lsc]); + for (; $i >= $st_pl[$lsc]; $i--) { + $text[$i] .= "
\n"; + } + } + + # If this section is below $maxlevel, write it now. + if ($st_sl[$lsc] <= $maxlevel) { + open(SECOUT, ">${fileroot}-$st_file[$lsc].html"); + print SECOUT "$header[$st_ol[$lsc]] $text[$st_ol[$lsc]] " . + "$footer[$st_ol[$lsc]]"; + $text[$st_ol[$lsc]] = ""; + close(SECOUT); + } + last tagsw; + } + + # cross references + if (s/^<@@label>//) { + chop; + $text[$st_ol[$sc]] .= ""; + last tagsw; + } + if (s/^<@@ref>//) { + chop; + $refname = $_; + $text[$st_ol[$sc]] .= + ""; + last tagsw; + } + if (s/^<@@endref>//) { +# $text[$st_ol[$sc]] .= ""; + last tagsw; + } + if (s/^<@@refnam>//) { + $text[$st_ol[$sc]] .= "$refname"; + last tagsw; + } + # URLs + if (s/^<@@url>//) { + chop; + $urlname = $_; + $text[$st_ol[$sc]] .= ""; + last tagsw; + } + if (s/^<@@urlnam>//) { + $text[$st_ol[$sc]] .= "$urlname"; + last tagsw; + } + if (s/^<@@endurl>//) { +# $text[$st_ol[$sc]] .= ""; + last tagsw; + } + + + # If nothing else did anything with this line, just print it. + $text[$st_ol[$sc]] .= "$_"; + } + } + + print tocfile ""; + open(SECOUT, ">$fileroot.html"); + print SECOUT "$header[0] $text[0] $footer[0]"; + close(SECOUT); + close tocfile; +} + +sub navbar { + local ($fnum, $fmax, $sc) = @_; + + $prevf = $fnum - 1; + $nextf = $fnum + 1; + + $navbar[$st_ol[$sc]] = "\n"; + $navbar[$st_ol[$sc]] .= + "Table of Contents\n"; + if ($prevf <= 0) { + $navbar[$st_ol[$sc]] .= + "| Previous\n"; + } + else { + $navbar[$st_ol[$sc]] .= + "| Previous\n"; + } + if ($nextf <= $fmax) { + $navbar[$st_ol[$sc]] .= + "| Next\n"; + } + else { + $navbar[$st_ol[$sc]] .= + "| Next\n"; + } + $navbar[$st_ol[$sc]] .= "\n"; +} + + + + + +# Now, read the command line and take appropriate action + +$fcount = 0; +for (@ARGV) { + if (/^-.*/) { + s/^-//; + $gen{$_} = 1; + } + else { + @infiles[$fcount] = $_; + $fcount++; + } +} + +for ($i = 0; $i < $fcount; $i++) { + if (&getfile($infiles[$i])) { + if ($gen{'html'}) { + print "generating $fileroot.html...\n"; &gen_html(); } + if ($gen{'tex'} || $gen{'latex'}) { + print "generating $fileroot.tex...\n"; &gen_latex(); } + if ($gen{'nroff'}) { + print "generating $fileroot.nroff...\n"; &gen_nroff(); } + if ($gen{'txt'} || $gen{'ascii'}) { + print "generating $fileroot.txt...\n"; &gen_ascii(); } + if ($gen{'ps'}) { + print "generating $fileroot.ps...\n"; &gen_ps(); } + } + else { + print "Input file $infiles[$i] not found\n"; + } +} + +exit 0; +