freebsd-dev/contrib/texinfo/makeinfo/index.c
2002-07-22 16:52:57 +00:00

860 lines
24 KiB
C
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/* index.c -- indexing for Texinfo.
$Id: index.c,v 1.25 2002/03/19 14:43:04 karl Exp $
Copyright (C) 1998, 99, 2002 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
#include "system.h"
#include "index.h"
#include "lang.h"
#include "macro.h"
#include "toc.h"
#include "xml.h"
/* An index element... */
typedef struct index_elt
{
struct index_elt *next;
char *entry; /* The index entry itself, after expansion. */
char *entry_text; /* The original, non-expanded entry text. */
char *node; /* The node from whence it came. */
int code; /* Nonzero means add `@code{...}' when
printing this element. */
int defining_line; /* Line number where this entry was written. */
char *defining_file; /* Source file for defining_line. */
} INDEX_ELT;
/* A list of short-names for each index.
There are two indices into the the_indices array.
* read_index is the index that points to the list of index
entries that we will find if we ask for the list of entries for
this name.
* write_index is the index that points to the list of index entries
that we will add new entries to.
Initially, read_index and write_index are the same, but the
@syncodeindex and @synindex commands can change the list we add
entries to.
For example, after the commands
@cindex foo
@defindex ii
@synindex cp ii
@cindex bar
the cp index will contain the entry `foo', and the new ii
index will contain the entry `bar'. This is consistent with the
way texinfo.tex handles the same situation.
In addition, for each index, it is remembered whether that index is
a code index or not. Code indices have @code{} inserted around the
first word when they are printed with printindex. */
typedef struct
{
char *name;
int read_index; /* index entries for `name' */
int write_index; /* store index entries here, @synindex can change it */
int code;
} INDEX_ALIST;
INDEX_ALIST **name_index_alist = NULL;
/* An array of pointers. Each one is for a different index. The
"synindex" command changes which array slot is pointed to by a
given "index". */
INDEX_ELT **the_indices = NULL;
/* The number of defined indices. */
int defined_indices = 0;
/* Stuff for defining commands on the fly. */
COMMAND **user_command_array = NULL;
int user_command_array_len = 0;
/* How to compare index entries for sorting. May be set to strcoll. */
int (*index_compare_fn) () = strcasecmp;
/* Find which element in the known list of indices has this name.
Returns -1 if NAME isn't found. */
static int
find_index_offset (name)
char *name;
{
int i;
for (i = 0; i < defined_indices; i++)
if (name_index_alist[i] && STREQ (name, name_index_alist[i]->name))
return i;
return -1;
}
/* Return a pointer to the entry of (name . index) for this name.
Return NULL if the index doesn't exist. */
INDEX_ALIST *
find_index (name)
char *name;
{
int offset = find_index_offset (name);
if (offset > -1)
return name_index_alist[offset];
else
return NULL;
}
/* User-defined commands, which happens only from user-defined indexes.
Used to initialize the builtin indices, too. */
void
define_user_command (name, proc, needs_braces_p)
char *name;
COMMAND_FUNCTION *proc;
int needs_braces_p;
{
int slot = user_command_array_len;
user_command_array_len++;
if (!user_command_array)
user_command_array = xmalloc (1 * sizeof (COMMAND *));
user_command_array = xrealloc (user_command_array,
(1 + user_command_array_len) * sizeof (COMMAND *));
user_command_array[slot] = xmalloc (sizeof (COMMAND));
user_command_array[slot]->name = xstrdup (name);
user_command_array[slot]->proc = proc;
user_command_array[slot]->argument_in_braces = needs_braces_p;
}
/* Please release me, let me go... */
static void
free_index (index)
INDEX_ELT *index;
{
INDEX_ELT *temp;
while ((temp = index))
{
free (temp->entry);
free (temp->entry_text);
/* Do not free the node, because we already freed the tag table,
which freed all the node names. */
/* free (temp->node); */
index = index->next;
free (temp);
}
}
/* Flush an index by name. This will delete the list of entries that
would be written by a @printindex command for this index. */
static void
undefindex (name)
char *name;
{
int i;
int which = find_index_offset (name);
/* The index might have already been freed if this was the target of
an @synindex. */
if (which < 0 || !name_index_alist[which])
return;
i = name_index_alist[which]->read_index;
free_index (the_indices[i]);
the_indices[i] = NULL;
free (name_index_alist[which]->name);
free (name_index_alist[which]);
name_index_alist[which] = NULL;
}
/* Add the arguments to the current index command to the index NAME.
html fixxme generate specific html anchor */
static void
index_add_arg (name)
char *name;
{
int which;
char *index_entry;
INDEX_ALIST *tem;
tem = find_index (name);
which = tem ? tem->write_index : -1;
if (macro_expansion_output_stream && !executing_string)
append_to_expansion_output (input_text_offset + 1);
get_rest_of_line (0, &index_entry);
ignore_blank_line ();
if (macro_expansion_output_stream && !executing_string)
{
char *index_line = xmalloc (strlen (index_entry) + 2);
sprintf (index_line, "%s\n", index_entry);
me_execute_string_keep_state (index_line, NULL);
free (index_line);
}
if (which < 0)
{
line_error (_("Unknown index `%s'"), name);
free (index_entry);
}
else
{
INDEX_ELT *new = xmalloc (sizeof (INDEX_ELT));
new->next = the_indices[which];
new->entry_text = index_entry;
new->entry = NULL;
new->node = current_node ? current_node : xstrdup ("");
new->code = tem->code;
new->defining_line = line_number - 1;
/* We need to make a copy since input_filename may point to
something that goes away, for example, inside a macro.
(see the findexerr test). */
new->defining_file = xstrdup (input_filename);
the_indices[which] = new;
#if 0
/* The index breaks if there are colons in the entry.
-- This is true, but it's too painful to force changing index
entries to use `colon', and too confusing for users. The real
fix is to change Info support to support arbitrary characters
in node names, and we're not ready to do that. --karl,
19mar02. */
if (strchr (new->entry_text, ':'))
warning (_("Info cannot handle `:' in index entry `%s'"),
new->entry_text);
#endif
}
if (xml)
xml_insert_indexterm (index_entry, name);
}
/* The function which user defined index commands call. */
static void
gen_index ()
{
char *name = xstrdup (command);
if (strlen (name) >= strlen ("index"))
name[strlen (name) - strlen ("index")] = 0;
index_add_arg (name);
free (name);
}
/* Define an index known as NAME. We assign the slot number.
If CODE is nonzero, make this a code index. */
static void
defindex (name, code)
char *name;
int code;
{
int i, slot;
/* If it already exists, flush it. */
undefindex (name);
/* Try to find an empty slot. */
slot = -1;
for (i = 0; i < defined_indices; i++)
if (!name_index_alist[i])
{
slot = i;
break;
}
if (slot < 0)
{ /* No such luck. Make space for another index. */
slot = defined_indices;
defined_indices++;
name_index_alist = (INDEX_ALIST **)
xrealloc (name_index_alist, (1 + defined_indices)
* sizeof (INDEX_ALIST *));
the_indices = (INDEX_ELT **)
xrealloc (the_indices, (1 + defined_indices) * sizeof (INDEX_ELT *));
}
/* We have a slot. Start assigning. */
name_index_alist[slot] = xmalloc (sizeof (INDEX_ALIST));
name_index_alist[slot]->name = xstrdup (name);
name_index_alist[slot]->read_index = slot;
name_index_alist[slot]->write_index = slot;
name_index_alist[slot]->code = code;
the_indices[slot] = NULL;
}
/* Define an index NAME, implicitly @code if CODE is nonzero. */
static void
top_defindex (name, code)
char *name;
int code;
{
char *temp;
temp = xmalloc (1 + strlen (name) + strlen ("index"));
sprintf (temp, "%sindex", name);
define_user_command (temp, gen_index, 0);
defindex (name, code);
free (temp);
}
/* Set up predefined indices. */
void
init_indices ()
{
int i;
/* Create the default data structures. */
/* Initialize data space. */
if (!the_indices)
{
the_indices = xmalloc ((1 + defined_indices) * sizeof (INDEX_ELT *));
the_indices[defined_indices] = NULL;
name_index_alist = xmalloc ((1 + defined_indices)
* sizeof (INDEX_ALIST *));
name_index_alist[defined_indices] = NULL;
}
/* If there were existing indices, get rid of them now. */
for (i = 0; i < defined_indices; i++)
{
undefindex (name_index_alist[i]->name);
if (name_index_alist[i])
{ /* Suppose we're called with two input files, and the first
does a @synindex pg cp. Then, when we get here to start
the second file, the "pg" element won't get freed by
undefindex (because it's pointing to "cp"). So free it
here; otherwise, when we try to define the pg index again
just below, it will still point to cp. */
free (name_index_alist[i]->name);
free (name_index_alist[i]);
name_index_alist[i] = NULL;
}
}
/* Add the default indices. */
top_defindex ("cp", 0); /* cp is the only non-code index. */
top_defindex ("fn", 1);
top_defindex ("ky", 1);
top_defindex ("pg", 1);
top_defindex ("tp", 1);
top_defindex ("vr", 1);
}
/* Given an index name, return the offset in the_indices of this index,
or -1 if there is no such index. */
int
translate_index (name)
char *name;
{
INDEX_ALIST *which = find_index (name);
if (which)
return which->read_index;
else
return -1;
}
/* Return the index list which belongs to NAME. */
INDEX_ELT *
index_list (name)
char *name;
{
int which = translate_index (name);
if (which < 0)
return (INDEX_ELT *) -1;
else
return the_indices[which];
}
/* Define a new index command. Arg is name of index. */
static void
gen_defindex (code)
int code;
{
char *name;
get_rest_of_line (0, &name);
if (find_index (name))
{
line_error (_("Index `%s' already exists"), name);
}
else
{
char *temp = xmalloc (strlen (name) + sizeof ("index"));
sprintf (temp, "%sindex", name);
define_user_command (temp, gen_index, 0);
defindex (name, code);
free (temp);
}
free (name);
}
void
cm_defindex ()
{
gen_defindex (0);
}
void
cm_defcodeindex ()
{
gen_defindex (1);
}
/* Expects 2 args, on the same line. Both are index abbreviations.
Make the first one be a synonym for the second one, i.e. make the
first one have the same index as the second one. */
void
cm_synindex ()
{
int source, target;
char *abbrev1, *abbrev2;
skip_whitespace ();
get_until_in_line (0, " ", &abbrev1);
target = find_index_offset (abbrev1);
skip_whitespace ();
get_until_in_line (0, " ", &abbrev2);
source = find_index_offset (abbrev2);
if (source < 0 || target < 0)
{
line_error (_("Unknown index `%s' and/or `%s' in @synindex"),
abbrev1, abbrev2);
}
else
{
name_index_alist[target]->write_index
= name_index_alist[source]->write_index;
}
free (abbrev1);
free (abbrev2);
}
void
cm_pindex () /* Pinhead index. */
{
index_add_arg ("pg");
}
void
cm_vindex () /* Variable index. */
{
index_add_arg ("vr");
}
void
cm_kindex () /* Key index. */
{
index_add_arg ("ky");
}
void
cm_cindex () /* Concept index. */
{
index_add_arg ("cp");
}
void
cm_findex () /* Function index. */
{
index_add_arg ("fn");
}
void
cm_tindex () /* Data Type index. */
{
index_add_arg ("tp");
}
int
index_element_compare (element1, element2)
INDEX_ELT **element1, **element2;
{
return index_compare_fn ((*element1)->entry, (*element2)->entry);
}
/* Force all index entries to be unique. */
void
make_index_entries_unique (array, count)
INDEX_ELT **array;
int count;
{
int i, j;
INDEX_ELT **copy;
int counter = 1;
copy = xmalloc ((1 + count) * sizeof (INDEX_ELT *));
for (i = 0, j = 0; i < count; i++)
{
if (i == (count - 1)
|| array[i]->node != array[i + 1]->node
|| !STREQ (array[i]->entry, array[i + 1]->entry))
copy[j++] = array[i];
else
{
free (array[i]->entry);
free (array[i]->entry_text);
free (array[i]);
}
}
copy[j] = NULL;
/* Now COPY contains only unique entries. Duplicated entries in the
original array have been freed. Replace the current array with
the copy, fixing the NEXT pointers. */
for (i = 0; copy[i]; i++)
{
copy[i]->next = copy[i + 1];
/* Fix entry names which are the same. They point to different nodes,
so we make the entry name unique. */
if (copy[i+1]
&& STREQ (copy[i]->entry, copy[i + 1]->entry)
&& !html)
{
char *new_entry_name;
new_entry_name = xmalloc (10 + strlen (copy[i]->entry));
sprintf (new_entry_name, "%s <%d>", copy[i]->entry, counter);
free (copy[i]->entry);
copy[i]->entry = new_entry_name;
counter++;
}
else
counter = 1;
array[i] = copy[i];
}
array[i] = NULL;
/* Free the storage used only by COPY. */
free (copy);
}
/* Sort the index passed in INDEX, returning an array of
pointers to elements. The array is terminated with a NULL
pointer. We call qsort because it's supposed to be fast.
I think this looks bad. */
INDEX_ELT **
sort_index (index)
INDEX_ELT *index;
{
INDEX_ELT **array;
INDEX_ELT *temp = index;
int count = 0;
int save_line_number = line_number;
char *save_input_filename = input_filename;
int save_html = html;
/* Pretend we are in non-HTML mode, for the purpose of getting the
expanded index entry that lacks any markup and other HTML escape
characters which could produce a wrong sort order. */
/* fixme: html: this still causes some markup, such as non-ASCII
characters @AE{} etc., to sort incorrectly. */
html = 0;
while (temp)
{
count++;
temp = temp->next;
}
/* We have the length. Make an array. */
array = xmalloc ((count + 1) * sizeof (INDEX_ELT *));
count = 0;
temp = index;
while (temp)
{
array[count++] = temp;
/* Set line number and input filename to the source line for this
index entry, as this expansion finds any errors. */
line_number = array[count - 1]->defining_line;
input_filename = array[count - 1]->defining_file;
/* If this particular entry should be printed as a "code" index,
then expand it as @code{entry}, i.e. as in fixed-width font. */
array[count-1]->entry = expansion (temp->entry_text,
array[count-1]->code);
temp = temp->next;
}
array[count] = NULL; /* terminate the array. */
line_number = save_line_number;
input_filename = save_input_filename;
html = save_html;
#ifdef HAVE_STRCOLL
/* This is not perfect. We should set (then restore) the locale to the
documentlanguage, so strcoll operates according to the document's
locale, not the user's. For now, I'm just going to assume that
those few new documents which use @documentlanguage will be
processed in the appropriate locale. In any case, don't use
strcoll in the C (aka POSIX) locale, that is the ASCII ordering. */
if (language_code != en)
{
char *lang_env = getenv ("LANG");
if (lang_env && !STREQ (lang_env, "C") && !STREQ (lang_env, "POSIX"))
index_compare_fn = strcoll;
}
#endif /* HAVE_STRCOLL */
/* Sort the array. */
qsort (array, count, sizeof (INDEX_ELT *), index_element_compare);
make_index_entries_unique (array, count);
return array;
}
/* Nonzero means that we are in the middle of printing an index. */
int printing_index = 0;
/* Takes one arg, a short name of an index to print.
Outputs a menu of the sorted elements of the index. */
void
cm_printindex ()
{
if (xml && !docbook)
{
char *index_name;
get_rest_of_line (0, &index_name);
xml_insert_element (PRINTINDEX, START);
insert_string (index_name);
xml_insert_element (PRINTINDEX, END);
}
else
{
int item;
INDEX_ELT *index;
INDEX_ELT *last_index = 0;
INDEX_ELT **array;
char *index_name;
unsigned line_length;
char *line;
int saved_inhibit_paragraph_indentation = inhibit_paragraph_indentation;
int saved_filling_enabled = filling_enabled;
int saved_line_number = line_number;
char *saved_input_filename = input_filename;
close_paragraph ();
get_rest_of_line (0, &index_name);
index = index_list (index_name);
if (index == (INDEX_ELT *)-1)
{
line_error (_("Unknown index `%s' in @printindex"), index_name);
free (index_name);
return;
}
/* Do this before sorting, so execute_string is in the good environment */
if (xml && docbook)
xml_begin_index ();
/* Do this before sorting, so execute_string in index_element_compare
will give the same results as when we actually print. */
printing_index = 1;
filling_enabled = 0;
inhibit_paragraph_indentation = 1;
xml_sort_index = 1;
array = sort_index (index);
xml_sort_index = 0;
close_paragraph ();
if (html)
add_word ("<ul compact>");
else if (!no_headers && !docbook)
add_word ("* Menu:\n\n");
me_inhibit_expansion++;
/* This will probably be enough. */
line_length = 100;
line = xmalloc (line_length);
for (item = 0; (index = array[item]); item++)
{
/* A pathological document might have an index entry outside of any
node. Don't crash; try using the section name instead. */
char *index_node = index->node;
line_number = index->defining_line;
input_filename = index->defining_file;
if ((!index_node || !*index_node) && html)
index_node = toc_find_section_of_node (index_node);
if (!index_node || !*index_node)
{
line_error (_("Entry for index `%s' outside of any node"),
index_name);
if (html || !no_headers)
index_node = _("(outside of any node)");
}
if (html)
/* fixme: html: we should use specific index anchors pointing
to the actual location of the indexed position (but then we
have to find something to wrap the anchor around). */
{
if (last_index
&& STREQ (last_index->entry_text, index->entry_text))
add_word (", "); /* Don't repeat the previous entry. */
else
{
/* In the HTML case, the expanded index entry is not
good for us, since it was expanded for non-HTML mode
inside sort_index. So we need to HTML-escape and
expand the original entry text here. */
char *escaped_entry = xstrdup (index->entry_text);
char *expanded_entry;
/* expansion() doesn't HTML-escape the argument, so need
to do it separately. */
escaped_entry = escape_string (escaped_entry);
expanded_entry = expansion (escaped_entry, index->code);
add_word_args ("\n<li>%s: ", expanded_entry);
free (escaped_entry);
free (expanded_entry);
}
add_word ("<a href=\"");
if (index->node && *index->node)
{
/* Make sure any non-macros in the node name are expanded. */
in_fixed_width_font++;
index_node = expansion (index_node, 0);
in_fixed_width_font--;
add_anchor_name (index_node, 1);
add_word_args ("\">%s</a>", index_node);
free (index_node);
}
else if (STREQ (index_node, _("(outside of any node)")))
{
add_anchor_name (index_node, 1);
add_word_args ("\">%s</a>", index_node);
}
else
/* If we use the section instead of the (missing) node, then
index_node already includes all we need except the #. */
add_word_args ("#%s</a>", index_node);
}
else if (xml && docbook)
{
xml_insert_indexentry (index->entry, index_node);
}
else
{
unsigned new_length = strlen (index->entry);
if (new_length < 50) /* minimum length used below */
new_length = 50;
new_length += strlen (index_node) + 7; /* * : .\n\0 */
if (new_length > line_length)
{
line_length = new_length;
line = xrealloc (line, line_length);
}
/* Print the entry, nicely formatted. We've already
expanded any commands in index->entry, including any
implicit @code. Thus, can't call execute_string, since
@@ has turned into @. */
if (!no_headers)
{
sprintf (line, "* %-37s ", index->entry);
line[2 + strlen (index->entry)] = ':';
insert_string (line);
/* Make sure any non-macros in the node name are expanded. */
in_fixed_width_font++;
execute_string ("%s.\n", index_node);
in_fixed_width_font--;
}
else
{
/* With --no-headers, the @node lines are gone, so
there's little sense in referring to them in the
index. Instead, output the number or name of the
section that corresponds to that node. */
char *section_name = toc_find_section_of_node (index_node);
sprintf (line, "%-*s ", number_sections ? 50 : 1, index->entry);
line[strlen (index->entry)] = ':';
insert_string (line);
if (section_name)
{
int idx = 0;
unsigned ref_len = strlen (section_name) + 30;
if (ref_len > line_length)
{
line_length = ref_len;
line = xrealloc (line, line_length);
}
if (number_sections)
{
while (section_name[idx]
&& (isdigit (section_name[idx])
|| (idx && section_name[idx] == '.')))
idx++;
}
if (idx)
sprintf (line, " See %.*s.\n", idx, section_name);
else
sprintf (line, "\n See ``%s''.\n", section_name);
insert_string (line);
}
else
{
insert_string (" "); /* force a blank */
execute_string ("See node %s.\n", index_node);
}
}
}
/* Prevent `output_paragraph' from growing to the size of the
whole index. */
flush_output ();
last_index = index;
}
free (line);
free (index_name);
me_inhibit_expansion--;
printing_index = 0;
free (array);
close_single_paragraph ();
filling_enabled = saved_filling_enabled;
inhibit_paragraph_indentation = saved_inhibit_paragraph_indentation;
input_filename = saved_input_filename;
line_number = saved_line_number;
if (html)
add_word ("</ul>");
else if (xml && docbook)
xml_end_index ();
}
}