diff3: Clean up printing of ranges for edscript output

Replace the edscript code that tracked and printed lines using byte
offsets with code that can work from line offsets.

This tidies up the reduces duplication in the edscript output code. It
also fixes the usage of the de struct so that it only tracks diffs as
line offsets rather than the usage changing from line offsets to byte
offsets during the lifetime of diff3.

Large files with large numbers of ranges will probably suffer in
performance here, but as we don't use diff3 yet this isn't a regression.
Include a warning for future hackers so they have a place to start
hacking from.

Reviewed by:	pstef
Sponsored by:	Klara, Inc.
Differential Revision:	https://reviews.freebsd.org/D34941
This commit is contained in:
Tom Jones 2022-04-19 14:47:07 +01:00
parent 8c47d8f538
commit 2184ca3f19

View File

@ -110,10 +110,9 @@ static struct diff *d13;
static struct diff *d23;
/*
* "de" is used to gather editing scripts. These are later spewed out in
* reverse order. Its first element must be all zero, the "new" component
* of "de" contains line positions or byte positions depending on when you
* look (!?). Array overlap indicates which sections in "de" correspond to
* lines that are different in all three files.
* reverse order. Its first element must be all zero, the "old" and "new"
* components of "de" contain line positions. Array overlap indicates which
* sections in "de" correspond to lines that are different in all three files.
*/
static struct diff *de;
static char *overlap;
@ -144,6 +143,7 @@ static void repos(int);
static void edscript(int) __dead2;
static void increase(void);
static void usage(void) __dead2;
static void printrange(FILE *, struct range *);
enum {
DIFFPROG_OPT,
@ -507,19 +507,46 @@ edit(struct diff *diff, bool dup, int j)
overlapcnt++;
de[j].old.from = diff->old.from;
de[j].old.to = diff->old.to;
de[j].new.from = de[j-1].new.to + skip(2, diff->new.from, NULL);
de[j].new.to = de[j].new.from + skip(2, diff->new.to, NULL);
de[j].new.from = diff->new.from;
de[j].new.to = diff->new.to;
return (j);
}
static void
printrange(FILE *p, struct range *r)
{
char *line = NULL;
size_t len = 0;
int i = 1;
ssize_t rlen = 0;
/* We haven't been asked to print anything */
if (r->from == r->to)
return;
if (r->from > r->to)
errx(EXIT_FAILURE, "invalid print range");
/*
* XXX-THJ: We read through all of the file for each range printed.
* This duplicates work and will probably impact performance on large
* files with lots of ranges.
*/
fseek(p, 0L, SEEK_SET);
while ((rlen = getline(&line, &len, p)) > 0) {
if (i >= r->from)
printf("%s", line);
if (++i > r->to - 1)
break;
}
free(line);
}
/* regurgitate */
static void
edscript(int n)
{
int k;
bool delete;
size_t j;
char block[BUFSIZ];
for (; n > 0; n--) {
delete = (de[n].new.from == de[n].new.to);
@ -527,34 +554,9 @@ edscript(int n)
prange(&de[n].old, delete);
} else {
printf("%da\n", de[n].old.to - 1);
if (Aflag) {
printf("%s\n", f2mark);
fseek(fp[1], de[n].old.from, SEEK_SET);
for (k = de[n].old.to - de[n].old.from; k > 0; k -= j) {
j = k > BUFSIZ ? BUFSIZ : k;
if (fread(block, 1, j, fp[1]) != j)
errx(2, "logic error");
fwrite(block, 1, j, stdout);
}
printf("\n");
}
printf("=======\n");
}
fseek(fp[2], (long)de[n].new.from, SEEK_SET);
for (k = de[n].new.to - de[n].new.from; k > 0; k -= j) {
size_t r;
j = k > BUFSIZ ? BUFSIZ : k;
r = fread(block, 1, j, fp[2]);
if (r == 0) {
if (feof(fp[2]))
break;
errx(2, "logic error");
}
if (r != j)
j = r;
(void)fwrite(block, 1, j, stdout);
}
printrange(fp[2], &de[n].new);
if (!oflag || !overlap[n]) {
if (!delete)
printf(".\n");