Change the semantics of -i (in-place editing) so that it treats

each file independently from other files.  The new semantics are
desired in the most of practical cases, e.g.: delete lines 5-9
from each file.

Keep the previous semantics of -i under a new option, -I, which
uses a single continuous address space covering all files to edit
in-place -- they are too cool to just drop them.

Add regression tests for -i and -I.

Approved by:	dds
Compared with:	GNU sed
Discussed on:	-hackers
MFC after:	2 weeks
This commit is contained in:
Yaroslav Tykhiy 2007-04-21 01:21:36 +00:00
parent 0e5179e441
commit f6703c9c0a
5 changed files with 118 additions and 12 deletions

View File

@ -2,7 +2,7 @@
REGRESSION_START($1)
echo '1..16'
echo '1..20'
REGRESSION_TEST(`G', `sed G < regress.in')
REGRESSION_TEST(`P', `sed P < regress.in')
@ -28,6 +28,40 @@ foo
REGRESSION_TEST(`b2a', `sed ''`2,3b
1,2d''` < regress.in')
`
inplace_test()
{
expr="$1"
rc=0
ns=$(jot 5)
ins= outs= _ins=
for n in $ns; do
jot -w "l${n}_%d" 9 | tee lines.in.$n lines._in.$n | \
sed "$expr" > lines.out.$n
ins="$ins lines.in.$n"
outs="$outs lines.out.$n"
_ins="$_ins lines._in.$n"
done
sed "$expr" $_ins > lines.out
sed -i "" "$expr" $ins
sed -I "" "$expr" $_ins
for n in $ns; do
diff -u lines.out.$n lines.in.$n || rc=1
done
cat $_ins | diff -u lines.out - || rc=1
rm -f $ins $outs $_ins lines.out
return $rc
}
'
REGRESSION_TEST_FREEFORM(`inplace1', `inplace_test 3,6d')
REGRESSION_TEST_FREEFORM(`inplace2', `inplace_test 8,30d')
REGRESSION_TEST_FREEFORM(`inplace3', `inplace_test 20,99d')
REGRESSION_TEST_FREEFORM(`inplace4', `inplace_test "{;{;8,30d;};}"')
REGRESSION_TEST(`hanoi', `echo ":abcd: : :" | sed -f hanoi.sed')
REGRESSION_TEST(`math', `echo "4+7*3+2^7/3" | sed -f math.sed')

View File

@ -52,4 +52,5 @@ char *cu_fgets(char *, int, int *);
int mf_fgets(SPACE *, enum e_spflag);
int lastline(void);
void process(void);
void resetranges(void);
char *strregerror(int, regex_t *);

View File

@ -101,9 +101,12 @@ int aflag, eflag, nflag;
int rflags = 0;
static int rval; /* Exit status */
static int ispan; /* Whether inplace editing spans across files */
/*
* Current file and line number; line numbers restart across compilation
* units, but span across input files.
* units, but span across input files. The latter is optional if editing
* in place.
*/
const char *fname; /* File name. */
const char *outfname; /* Output file name */
@ -127,11 +130,15 @@ main(int argc, char *argv[])
fflag = 0;
inplace = NULL;
while ((c = getopt(argc, argv, "Eae:f:i:ln")) != -1)
while ((c = getopt(argc, argv, "EI:ae:f:i:ln")) != -1)
switch (c) {
case 'E':
rflags = REG_EXTENDED;
break;
case 'I':
inplace = optarg;
ispan = 1; /* span across input files */
break;
case 'a':
aflag = 1;
break;
@ -149,6 +156,7 @@ main(int argc, char *argv[])
break;
case 'i':
inplace = optarg;
ispan = 0; /* don't span across input files */
break;
case 'l':
if(setlinebuf(stdout) != 0)
@ -307,7 +315,7 @@ mf_fgets(SPACE *sp, enum e_spflag spflag)
/* stdin? */
if (files->fname == NULL) {
if (inplace != NULL)
errx(1, "-i may not be used with stdin");
errx(1, "-I or -i may not be used with stdin");
infile = stdin;
fname = "stdin";
outfile = stdout;
@ -380,6 +388,10 @@ mf_fgets(SPACE *sp, enum e_spflag spflag)
fchown(fileno(outfile), sb.st_uid, sb.st_gid);
fchmod(fileno(outfile), sb.st_mode & ALLPERMS);
outfname = tmpfname;
if (!ispan) {
linenum = 0;
resetranges();
}
} else {
outfile = stdout;
outfname = "stdout";
@ -448,7 +460,7 @@ lastline(void)
{
int ch;
if (files->next != NULL)
if (files->next != NULL && (inplace == NULL || ispan))
return (0);
if ((ch = getc(infile)) == EOF)
return (1);

View File

@ -316,6 +316,19 @@ applies(struct s_command *cp)
return (cp->nonsel ? ! r : r);
}
/*
* Reset all inrange markers.
*/
void
resetranges(void)
{
struct s_command *cp;
for (cp = prog; cp; cp = cp->code == '{' ? cp->u.c : cp->next)
if (cp->a2)
cp->inrange = 0;
}
/*
* substitute --
* Do substitutions in the pattern space. Currently, we build a

View File

@ -46,6 +46,7 @@
.Op Fl Ealn
.Op Fl e Ar command
.Op Fl f Ar command_file
.Op Fl I Ar extension
.Op Fl i Ar extension
.Op Ar
.Sh DESCRIPTION
@ -95,7 +96,7 @@ Append the editing commands found in the file
.Ar command_file
to the list of commands.
The editing commands should each be listed on a separate line.
.It Fl i Ar extension
.It Fl I Ar extension
Edit files in-place, saving backups with the specified
.Ar extension .
If a zero-length
@ -105,6 +106,36 @@ It is not recommended to give a zero-length
.Ar extension
when in-place editing files, as you risk corruption or partial content
in situations where disk space is exhausted, etc.
.Pp
Note that in-place editing with
.Fl I
still takes place in a single continuous line address space covering
all files, although each file preserves its individuality instead of
forming one output stream.
The line counter is never reset between files, address ranges can span
file boundaries, and the
.Dq $
address matches only the last line of the last file.
(See
.Sx "Sed Addresses" . )
That can lead to unexpected results in many cases of in-place editing,
where using
.Fl i
is desired.
.It Fl i Ar extension
Edit files in-place similarly to
.Fl I ,
but treat each file independently from other files.
In particular, line numbers in each file start at 1,
the
.Dq $
address matches the last line of the current file,
and address ranges are limited to the current file.
(See
.Sx "Sed Addresses" . )
The net result is as though each file were edited by a separate
.Nm
instance.
.It Fl l
Make output line buffered.
.It Fl n
@ -140,13 +171,28 @@ Some of the functions use a
.Em "hold space"
to save all or part of the pattern space for subsequent retrieval.
.Sh "Sed Addresses"
An address is not required, but if specified must be a number (that counts
An address is not required, but if specified must have one of the
following formats:
.Bl -bullet -offset indent
.It
a number that counts
input lines
cumulatively across input files), a dollar
cumulatively across input files (or in each file independently
if a
.Fl i
option is in effect);
.It
a dollar
.Pq Dq $
character that addresses the last line of input, or a context address
(which consists of a regular expression preceded and followed by a
delimiter).
character that addresses the last line of input (or the last line
of the current file if a
.Fl i
option was specified);
.It
a context address
that consists of a regular expression preceded and followed by a
delimiter.
.El
.Pp
A command line with no addresses selects every pattern space.
.Pp
@ -533,7 +579,7 @@ utility is expected to be a superset of the
specification.
.Pp
The
.Fl E , a
.Fl E , I , a
and
.Fl i
options are non-standard