From 2184ca3f191d8750fb56b6d50997d437646c9384 Mon Sep 17 00:00:00 2001 From: Tom Jones Date: Tue, 19 Apr 2022 14:47:07 +0100 Subject: [PATCH] diff3: Clean up printing of ranges for edscript output Replace the edscript code that tracked and printed lines using byte offsets with code that can work from line offsets. This tidies up the reduces duplication in the edscript output code. It also fixes the usage of the de struct so that it only tracks diffs as line offsets rather than the usage changing from line offsets to byte offsets during the lifetime of diff3. Large files with large numbers of ranges will probably suffer in performance here, but as we don't use diff3 yet this isn't a regression. Include a warning for future hackers so they have a place to start hacking from. Reviewed by: pstef Sponsored by: Klara, Inc. Differential Revision: https://reviews.freebsd.org/D34941 --- usr.bin/diff3/diff3.c | 72 ++++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 35 deletions(-) diff --git a/usr.bin/diff3/diff3.c b/usr.bin/diff3/diff3.c index 37aa599eaab7..396b2620d22c 100644 --- a/usr.bin/diff3/diff3.c +++ b/usr.bin/diff3/diff3.c @@ -110,10 +110,9 @@ static struct diff *d13; static struct diff *d23; /* * "de" is used to gather editing scripts. These are later spewed out in - * reverse order. Its first element must be all zero, the "new" component - * of "de" contains line positions or byte positions depending on when you - * look (!?). Array overlap indicates which sections in "de" correspond to - * lines that are different in all three files. + * reverse order. Its first element must be all zero, the "old" and "new" + * components of "de" contain line positions. Array overlap indicates which + * sections in "de" correspond to lines that are different in all three files. */ static struct diff *de; static char *overlap; @@ -144,6 +143,7 @@ static void repos(int); static void edscript(int) __dead2; static void increase(void); static void usage(void) __dead2; +static void printrange(FILE *, struct range *); enum { DIFFPROG_OPT, @@ -507,19 +507,46 @@ edit(struct diff *diff, bool dup, int j) overlapcnt++; de[j].old.from = diff->old.from; de[j].old.to = diff->old.to; - de[j].new.from = de[j-1].new.to + skip(2, diff->new.from, NULL); - de[j].new.to = de[j].new.from + skip(2, diff->new.to, NULL); + de[j].new.from = diff->new.from; + de[j].new.to = diff->new.to; return (j); } +static void +printrange(FILE *p, struct range *r) +{ + char *line = NULL; + size_t len = 0; + int i = 1; + ssize_t rlen = 0; + + /* We haven't been asked to print anything */ + if (r->from == r->to) + return; + + if (r->from > r->to) + errx(EXIT_FAILURE, "invalid print range"); + + /* + * XXX-THJ: We read through all of the file for each range printed. + * This duplicates work and will probably impact performance on large + * files with lots of ranges. + */ + fseek(p, 0L, SEEK_SET); + while ((rlen = getline(&line, &len, p)) > 0) { + if (i >= r->from) + printf("%s", line); + if (++i > r->to - 1) + break; + } + free(line); +} + /* regurgitate */ static void edscript(int n) { - int k; bool delete; - size_t j; - char block[BUFSIZ]; for (; n > 0; n--) { delete = (de[n].new.from == de[n].new.to); @@ -527,34 +554,9 @@ edscript(int n) prange(&de[n].old, delete); } else { printf("%da\n", de[n].old.to - 1); - if (Aflag) { - printf("%s\n", f2mark); - fseek(fp[1], de[n].old.from, SEEK_SET); - for (k = de[n].old.to - de[n].old.from; k > 0; k -= j) { - j = k > BUFSIZ ? BUFSIZ : k; - if (fread(block, 1, j, fp[1]) != j) - errx(2, "logic error"); - fwrite(block, 1, j, stdout); - } - printf("\n"); - } printf("=======\n"); } - fseek(fp[2], (long)de[n].new.from, SEEK_SET); - for (k = de[n].new.to - de[n].new.from; k > 0; k -= j) { - size_t r; - - j = k > BUFSIZ ? BUFSIZ : k; - r = fread(block, 1, j, fp[2]); - if (r == 0) { - if (feof(fp[2])) - break; - errx(2, "logic error"); - } - if (r != j) - j = r; - (void)fwrite(block, 1, j, stdout); - } + printrange(fp[2], &de[n].new); if (!oflag || !overlap[n]) { if (!delete) printf(".\n");