Mercurial > illumos > illumos-gate
changeset 13257:d3807abc6720
508 od doesn't set text domain
509 need an open source sed(1) replacement
516 libc regex misses \< and \> word delimiters
Reviewed by: gwr@nexenta.com
Reviewed by: roland.mainz@nexenta.com
Approved by: gwr@nexenta.com
author | Garrett D'Amore <garrett@nexenta.com> |
---|---|
date | Wed, 15 Dec 2010 17:12:35 -0800 |
parents | 4f23f0abcff2 |
children | feaf4b4bed62 |
files | usr/src/Makefile.lint usr/src/cmd/Makefile usr/src/cmd/mdb/tools/scripts/tigen.sh usr/src/cmd/od/od.c usr/src/cmd/sed/Makefile usr/src/cmd/sed/POSIX usr/src/cmd/sed/THIRDPARTYLICENSE usr/src/cmd/sed/THIRDPARTYLICENSE.descrip usr/src/cmd/sed/compile.c usr/src/cmd/sed/defs.h usr/src/cmd/sed/extern.h usr/src/cmd/sed/main.c usr/src/cmd/sed/misc.c usr/src/cmd/sed/process.c usr/src/cmd/sed/sed.1 usr/src/cmd/sed/sed.txt usr/src/head/regex.h usr/src/lib/libc/port/locale/engine.c usr/src/lib/libc/port/locale/regcomp.c usr/src/pkg/manifests/SUNWcs.mf usr/src/pkg/manifests/system-xopen-xcu4.mf |
diffstat | 21 files changed, 3868 insertions(+), 9 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/Makefile.lint Thu Dec 09 17:47:03 2010 -0800 +++ b/usr/src/Makefile.lint Wed Dec 15 17:12:35 2010 -0800 @@ -269,6 +269,7 @@ cmd/savecore \ cmd/sbdadm \ cmd/sdpadm \ + cmd/sed \ cmd/setpgrp \ cmd/smbios \ cmd/sgs \
--- a/usr/src/cmd/Makefile Thu Dec 09 17:47:03 2010 -0800 +++ b/usr/src/cmd/Makefile Wed Dec 15 17:12:35 2010 -0800 @@ -357,6 +357,7 @@ scsi \ sdiff \ sdpadm \ + sed \ sendmail \ setfacl \ setmnt \
--- a/usr/src/cmd/mdb/tools/scripts/tigen.sh Thu Dec 09 17:47:03 2010 -0800 +++ b/usr/src/cmd/mdb/tools/scripts/tigen.sh Wed Dec 15 17:12:35 2010 -0800 @@ -24,7 +24,6 @@ # Copyright 2004 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -#ident "%Z%%M% %I% %E% SMI" # # @@ -122,7 +121,7 @@ $verbose && echo "loading terminfo for $term ... \c" >& 2 echo "static const termio_attr_t ${cterm}_attrs[] = {" - sed -n '/termio_attrs\[\] = /,/^\}/p' $termio_c | \ + sed -n '/termio_attrs\[\] = /,/^}/p' $termio_c | \ sed -n \ 's/{ "\([a-z0-9]*\)", \([A-Z_]*\),.*/\1 \2/p' | \ while read attr type; do
--- a/usr/src/cmd/od/od.c Thu Dec 09 17:47:03 2010 -0800 +++ b/usr/src/cmd/od/od.c Wed Dec 15 17:12:35 2010 -0800 @@ -39,6 +39,11 @@ #define _(x) gettext(x) + +#ifndef TEXT_DOMAIN +#define TEXT_DOMAIN "SYS_TEST" +#endif + /* address format */ static char *afmt = "%07llo"; static char *cfmt = " "; @@ -551,6 +556,7 @@ input = stdin; (void) setlocale(LC_ALL, ""); + (void) textdomain(TEXT_DOMAIN); while ((c = getopt(argc, argv, "A:bCcdDfFj:N:oOsSxXvt:")) != EOF) { switch (c) {
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/cmd/sed/Makefile Wed Dec 15 17:12:35 2010 -0800 @@ -0,0 +1,53 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2010 Nexenta Systems, Inc. All rights reserved. +# + + +PROG= sed +XPG4PROG= sed +OBJS= main.o compile.o misc.o process.o +SRCS= $(OBJS:%.o=%.c) +POFILES= $(OBJS:%.o=%.po) + +include ../Makefile.cmd + +CPPFLAGS += -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 + +CFLAGS += $(CCVERBOSE) + +.KEEP_STATE: + + +all: $(PROG) + +$(PROG): $(OBJS) + $(LINK.c) $(OBJS) -o $@ $(LDLIBS) + $(POST_PROCESS) + +$(POFILE): $(POFILES) + $(RM) $@ + $(CAT) $(POFILES) > $@ + +$(ROOTXPG4PROG): + $(RM) $@ + $(LN) -s ../../bin/$(PROG) $@ + +install: all $(DIRS) $(ROOTPROG) $(ROOTXPG4PROG) + +clean: + $(RM) $(OBJS) $(POFILES) + +lint: lint_SRCS + +include ../Makefile.targ
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/cmd/sed/POSIX Wed Dec 15 17:12:35 2010 -0800 @@ -0,0 +1,204 @@ +# @(#)POSIX 8.1 (Berkeley) 6/6/93 +# $FreeBSD$ + +Comments on the IEEE P1003.2 Draft 12 + Part 2: Shell and Utilities + Section 4.55: sed - Stream editor + +Diomidis Spinellis <dds@doc.ic.ac.uk> +Keith Bostic <bostic@cs.berkeley.edu> + +In the following paragraphs, "wrong" usually means "inconsistent with +historic practice", as most of the following comments refer to +undocumented inconsistencies between the historical versions of sed and +the POSIX 1003.2 standard. All the comments are notes taken while +implementing a POSIX-compatible version of sed, and should not be +interpreted as official opinions or criticism towards the POSIX committee. +All uses of "POSIX" refer to section 4.55, Draft 12 of POSIX 1003.2. + + 1. 32V and BSD derived implementations of sed strip the text + arguments of the a, c and i commands of their initial blanks, + i.e. + + #!/bin/sed -f + a\ + foo\ + \ indent\ + bar + + produces: + + foo + indent + bar + + POSIX does not specify this behavior as the System V versions of + sed do not do this stripping. The argument against stripping is + that it is difficult to write sed scripts that have leading blanks + if they are stripped. The argument for stripping is that it is + difficult to write readable sed scripts unless indentation is allowed + and ignored, and leading whitespace is obtainable by entering a + backslash in front of it. This implementation follows the BSD + historic practice. + + 2. Historical versions of sed required that the w flag be the last + flag to an s command as it takes an additional argument. This + is obvious, but not specified in POSIX. + + 3. Historical versions of sed required that whitespace follow a w + flag to an s command. This is not specified in POSIX. This + implementation permits whitespace but does not require it. + + 4. Historical versions of sed permitted any number of whitespace + characters to follow the w command. This is not specified in + POSIX. This implementation permits whitespace but does not + require it. + + 5. The rule for the l command differs from historic practice. Table + 2-15 includes the various ANSI C escape sequences, including \\ + for backslash. Some historical versions of sed displayed two + digit octal numbers, too, not three as specified by POSIX. POSIX + is a cleanup, and is followed by this implementation. + + 6. The POSIX specification for ! does not specify that for a single + command the command must not contain an address specification + whereas the command list can contain address specifications. The + specification for ! implies that "3!/hello/p" works, and it never + has, historically. Note, + + 3!{ + /hello/p + } + + does work. + + 7. POSIX does not specify what happens with consecutive ! commands + (e.g. /foo/!!!p). Historic implementations allow any number of + !'s without changing the behaviour. (It seems logical that each + one might reverse the behaviour.) This implementation follows + historic practice. + + 8. Historic versions of sed permitted commands to be separated + by semi-colons, e.g. 'sed -ne '1p;2p;3q' printed the first + three lines of a file. This is not specified by POSIX. + Note, the ; command separator is not allowed for the commands + a, c, i, w, r, :, b, t, # and at the end of a w flag in the s + command. This implementation follows historic practice and + implements the ; separator. + + 9. Historic versions of sed terminated the script if EOF was reached + during the execution of the 'n' command, i.e.: + + sed -e ' + n + i\ + hello + ' </dev/null + + did not produce any output. POSIX does not specify this behavior. + This implementation follows historic practice. + +10. Deleted. + +11. Historical implementations do not output the change text of a c + command in the case of an address range whose first line number + is greater than the second (e.g. 3,1). POSIX requires that the + text be output. Since the historic behavior doesn't seem to have + any particular purpose, this implementation follows the POSIX + behavior. + +12. POSIX does not specify whether address ranges are checked and + reset if a command is not executed due to a jump. The following + program will behave in different ways depending on whether the + 'c' command is triggered at the third line, i.e. will the text + be output even though line 3 of the input will never logically + encounter that command. + + 2,4b + 1,3c\ + text + + Historic implementations did not output the text in the above + example. Therefore it was believed that a range whose second + address was never matched extended to the end of the input. + However, the current practice adopted by this implementation, + as well as by those from GNU and SUN, is as follows: The text + from the 'c' command still isn't output because the second address + isn't actually matched; but the range is reset after all if its + second address is a line number. In the above example, only the + first line of the input will be deleted. + +13. Historical implementations allow an output suppressing #n at the + beginning of -e arguments as well as in a script file. POSIX + does not specify this. This implementation follows historical + practice. + +14. POSIX does not explicitly specify how sed behaves if no script is + specified. Since the sed Synopsis permits this form of the command, + and the language in the Description section states that the input + is output, it seems reasonable that it behave like the cat(1) + command. Historic sed implementations behave differently for "ls | + sed", where they produce no output, and "ls | sed -e#", where they + behave like cat. This implementation behaves like cat in both cases. + +15. The POSIX requirement to open all w files at the beginning makes + sed behave nonintuitively when the w commands are preceded by + addresses or are within conditional blocks. This implementation + follows historic practice and POSIX, by default, and provides the + -a option which opens the files only when they are needed. + +16. POSIX does not specify how escape sequences other than \n and \D + (where D is the delimiter character) are to be treated. This is + reasonable, however, it also doesn't state that the backslash is + to be discarded from the output regardless. A strict reading of + POSIX would be that "echo xyz | sed s/./\a" would display "\ayz". + As historic sed implementations always discarded the backslash, + this implementation does as well. + +17. POSIX specifies that an address can be "empty". This implies + that constructs like ",d" or "1,d" and ",5d" are allowed. This + is not true for historic implementations or this implementation + of sed. + +18. The b t and : commands are documented in POSIX to ignore leading + white space, but no mention is made of trailing white space. + Historic implementations of sed assigned different locations to + the labels "x" and "x ". This is not useful, and leads to subtle + programming errors, but it is historic practice and changing it + could theoretically break working scripts. This implementation + follows historic practice. + +19. Although POSIX specifies that reading from files that do not exist + from within the script must not terminate the script, it does not + specify what happens if a write command fails. Historic practice + is to fail immediately if the file cannot be opened or written. + This implementation follows historic practice. + +20. Historic practice is that the \n construct can be used for either + string1 or string2 of the y command. This is not specified by + POSIX. This implementation follows historic practice. + +21. Deleted. + +22. Historic implementations of sed ignore the RE delimiter characters + within character classes. This is not specified in POSIX. This + implementation follows historic practice. + +23. Historic implementations handle empty RE's in a special way: the + empty RE is interpreted as if it were the last RE encountered, + whether in an address or elsewhere. POSIX does not document this + behavior. For example the command: + + sed -e /abc/s//XXX/ + + substitutes XXX for the pattern abc. The semantics of "the last + RE" can be defined in two different ways: + + 1. The last RE encountered when compiling (lexical/static scope). + 2. The last RE encountered while running (dynamic scope). + + While many historical implementations fail on programs depending + on scope differences, the SunOS version exhibited dynamic scope + behaviour. This implementation does dynamic scoping, as this seems + the most useful and in order to remain consistent with historical + practice.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/cmd/sed/THIRDPARTYLICENSE Wed Dec 15 17:12:35 2010 -0800 @@ -0,0 +1,31 @@ +Copyright 2010 Nexenta Systems, Inc. All rights reserved. +Copyright (c) 1992 Diomidis Spinellis. +Copyright (c) 1992, 1993, 1994 + The Regents of the University of California. All rights reserved. + +This code is derived from software contributed to Berkeley by +Diomidis Spinellis of Imperial College, University of London. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +4. Neither the name of the University nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/cmd/sed/THIRDPARTYLICENSE.descrip Wed Dec 15 17:12:35 2010 -0800 @@ -0,0 +1,1 @@ +SED - STREAM EDITOR
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/cmd/sed/compile.c Wed Dec 15 17:12:35 2010 -0800 @@ -0,0 +1,934 @@ +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 1992 Diomidis Spinellis. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Diomidis Spinellis of Imperial College, University of London. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> + +#include <ctype.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <regex.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <wchar.h> +#include <libintl.h> +#include <note.h> + +#include "defs.h" +#include "extern.h" + +#define LHSZ 128 +#define LHMASK (LHSZ - 1) +static struct labhash { + struct labhash *lh_next; + uint_t lh_hash; + struct s_command *lh_cmd; + int lh_ref; +} *labels[LHSZ]; + +static char *compile_addr(char *, struct s_addr *); +static char *compile_ccl(char **, char *); +static char *compile_delimited(char *, char *, int); +static char *compile_flags(char *, struct s_subst *); +static regex_t *compile_re(char *, int); +static char *compile_subst(char *, struct s_subst *); +static char *compile_text(void); +static char *compile_tr(char *, struct s_tr **); +static struct s_command + **compile_stream(struct s_command **); +static char *duptoeol(char *, const char *); +static void enterlabel(struct s_command *); +static struct s_command + *findlabel(char *); +static void fixuplabel(struct s_command *, struct s_command *); +static void uselabel(void); + +/* + * Command specification. This is used to drive the command parser. + */ +struct s_format { + char code; /* Command code */ + int naddr; /* Number of address args */ + enum e_args args; /* Argument type */ +}; + +static struct s_format cmd_fmts[] = { + {'{', 2, GROUP}, + {'}', 0, ENDGROUP}, + {'a', 1, TEXT}, + {'b', 2, BRANCH}, + {'c', 2, TEXT}, + {'d', 2, EMPTY}, + {'D', 2, EMPTY}, + {'g', 2, EMPTY}, + {'G', 2, EMPTY}, + {'h', 2, EMPTY}, + {'H', 2, EMPTY}, + {'i', 1, TEXT}, + {'l', 2, EMPTY}, + {'n', 2, EMPTY}, + {'N', 2, EMPTY}, + {'p', 2, EMPTY}, + {'P', 2, EMPTY}, + {'q', 1, EMPTY}, + {'r', 1, RFILE}, + {'s', 2, SUBST}, + {'t', 2, BRANCH}, + {'w', 2, WFILE}, + {'x', 2, EMPTY}, + {'y', 2, TR}, + {'!', 2, NONSEL}, + {':', 0, LABEL}, + {'#', 0, COMMENT}, + {'=', 1, EMPTY}, + {'\0', 0, COMMENT}, +}; + +/* The compiled program. */ +struct s_command *prog; + +/* + * Compile the program into prog. + * Initialise appends. + */ +void +compile(void) +{ + *compile_stream(&prog) = NULL; + fixuplabel(prog, NULL); + uselabel(); + if (appendnum == 0) + appends = NULL; + else if ((appends = malloc(sizeof (struct s_appends) * appendnum)) == + NULL) + err(1, "malloc"); + if ((match = malloc((maxnsub + 1) * sizeof (regmatch_t))) == NULL) + err(1, "malloc"); +} + +#define EATSPACE() do { \ + if (p) \ + while (*p && isspace((unsigned char)*p)) \ + p++; \ + _NOTE(CONSTCOND) \ +} while (0) + +static struct s_command ** +compile_stream(struct s_command **link) +{ + char *p; + static char lbuf[_POSIX2_LINE_MAX + 1]; /* To save stack */ + struct s_command *cmd, *cmd2, *stack; + struct s_format *fp; + char re[_POSIX2_LINE_MAX + 1]; + int naddr; /* Number of addresses */ + + stack = 0; + for (;;) { + if ((p = cu_fgets(lbuf, sizeof (lbuf), NULL)) == NULL) { + if (stack != 0) + fatal(_("unexpected EOF (pending }'s)")); + return (link); + } + +semicolon: EATSPACE(); + if (p) { + if (*p == '#' || *p == '\0') + continue; + else if (*p == ';') { + p++; + goto semicolon; + } + } + if ((*link = cmd = malloc(sizeof (struct s_command))) == NULL) + err(1, "malloc"); + link = &cmd->next; + cmd->startline = cmd->nonsel = 0; + /* First parse the addresses */ + naddr = 0; + +/* Valid characters to start an address */ +#define addrchar(c) (strchr("0123456789/\\$", (c))) + if (addrchar(*p)) { + naddr++; + if ((cmd->a1 = malloc(sizeof (struct s_addr))) == NULL) + err(1, "malloc"); + p = compile_addr(p, cmd->a1); + EATSPACE(); /* EXTENSION */ + if (*p == ',') { + p++; + EATSPACE(); /* EXTENSION */ + naddr++; + if ((cmd->a2 = malloc(sizeof (struct s_addr))) + == NULL) + err(1, "malloc"); + p = compile_addr(p, cmd->a2); + EATSPACE(); + } else + cmd->a2 = 0; + } else + cmd->a1 = cmd->a2 = 0; + +nonsel: /* Now parse the command */ + if (!*p) + fatal(_("command expected")); + cmd->code = *p; + for (fp = cmd_fmts; fp->code; fp++) + if (fp->code == *p) + break; + if (!fp->code) + fatal(_("invalid command code %c"), *p); + if (naddr > fp->naddr) + fatal(_("command %c expects up to %d address(es), " + "found %d"), *p, fp->naddr, naddr); + switch (fp->args) { + case NONSEL: /* ! */ + p++; + EATSPACE(); + cmd->nonsel = 1; + goto nonsel; + case GROUP: /* { */ + p++; + EATSPACE(); + cmd->next = stack; + stack = cmd; + link = &cmd->u.c; + if (*p) + goto semicolon; + break; + case ENDGROUP: + /* + * Short-circuit command processing, since end of + * group is really just a noop. + */ + cmd->nonsel = 1; + if (stack == 0) + fatal(_("unexpected }")); + cmd2 = stack; + stack = cmd2->next; + cmd2->next = cmd; + /*FALLTHROUGH*/ + case EMPTY: /* d D g G h H l n N p P q x = \0 */ + p++; + EATSPACE(); + if (*p == ';') { + p++; + link = &cmd->next; + goto semicolon; + } + if (*p) + fatal(_("extra characters at the end of %c " + "command"), cmd->code); + break; + case TEXT: /* a c i */ + p++; + EATSPACE(); + if (*p != '\\') + fatal(_("command %c expects \\ " + "followed by text"), cmd->code); + p++; + EATSPACE(); + if (*p) + fatal(_("extra characters after \\ " + "at the end of %c command"), + cmd->code); + cmd->t = compile_text(); + break; + case COMMENT: /* \0 # */ + break; + case WFILE: /* w */ + p++; + EATSPACE(); + if (*p == '\0') + fatal(_("filename expected")); + cmd->t = duptoeol(p, "w command"); + if (aflag) + cmd->u.fd = -1; + else if ((cmd->u.fd = open(p, + O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 0666)) == -1) + err(1, "%s", p); + break; + case RFILE: /* r */ + p++; + EATSPACE(); + if (*p == '\0') + fatal(_("filename expected")); + else + cmd->t = duptoeol(p, "read command"); + break; + case BRANCH: /* b t */ + p++; + EATSPACE(); + if (*p == '\0') + cmd->t = NULL; + else + cmd->t = duptoeol(p, "branch"); + break; + case LABEL: /* : */ + p++; + EATSPACE(); + cmd->t = duptoeol(p, "label"); + if (strlen(p) == 0) + fatal(_("empty label")); + enterlabel(cmd); + break; + case SUBST: /* s */ + p++; + if (*p == '\0' || *p == '\\') + fatal(_("substitute pattern can not " + "be delimited by newline or backslash")); + if ((cmd->u.s = calloc(1, sizeof (struct s_subst))) == + NULL) + err(1, "malloc"); + p = compile_delimited(p, re, 0); + if (p == NULL) + fatal(_("unterminated substitute pattern")); + + /* Compile RE with no case sensitivity temporarily */ + if (*re == '\0') + cmd->u.s->re = NULL; + else + cmd->u.s->re = compile_re(re, 0); + --p; + p = compile_subst(p, cmd->u.s); + p = compile_flags(p, cmd->u.s); + + /* Recompile RE with case sens. from "I" flag if any */ + if (*re == '\0') + cmd->u.s->re = NULL; + else + cmd->u.s->re = compile_re(re, cmd->u.s->icase); + EATSPACE(); + if (*p == ';') { + p++; + link = &cmd->next; + goto semicolon; + } + break; + case TR: /* y */ + p++; + p = compile_tr(p, &cmd->u.y); + EATSPACE(); + if (*p == ';') { + p++; + link = &cmd->next; + goto semicolon; + } + if (*p) + fatal(_("extra text at the end of a " + "transform command")); + break; + } + } +} + +/* + * Get a delimited string. P points to the delimeter of the string; d points + * to a buffer area. Newline and delimiter escapes are processed; other + * escapes are ignored. + * + * Returns a pointer to the first character after the final delimiter or NULL + * in the case of a non-terminated string. The character array d is filled + * with the processed string. + */ +static char * +compile_delimited(char *p, char *d, int is_tr) +{ + char c; + + c = *p++; + if (c == '\0') + return (NULL); + else if (c == '\\') + fatal(_("\\ can not be used as a string delimiter")); + else if (c == '\n') + fatal(_("newline can not be used as a string delimiter")); + while (*p) { + if (*p == '[' && *p != c) { + if ((d = compile_ccl(&p, d)) == NULL) + fatal(_("unbalanced brackets ([])")); + continue; + } else if (*p == '\\' && p[1] == '[') { + *d++ = *p++; + } else if (*p == '\\' && p[1] == c) + p++; + else if (*p == '\\' && p[1] == 'n') { + *d++ = '\n'; + p += 2; + continue; + } else if (*p == '\\' && p[1] == '\\') { + if (is_tr) + p++; + else + *d++ = *p++; + } else if (*p == c) { + *d = '\0'; + return (p + 1); + } + *d++ = *p++; + } + return (NULL); +} + + +/* compile_ccl: expand a POSIX character class */ +static char * +compile_ccl(char **sp, char *t) +{ + int c, d; + char *s = *sp; + + *t++ = *s++; + if (*s == '^') + *t++ = *s++; + if (*s == ']') + *t++ = *s++; + for (; *s && (*t = *s) != ']'; s++, t++) + if (*s == '[' && + ((d = *(s+1)) == '.' || d == ':' || d == '=')) { + *++t = *++s, t++, s++; + for (c = *s; (*t = *s) != ']' || c != d; s++, t++) + if ((c = *s) == '\0') + return (NULL); + } + return ((*s == ']') ? *sp = ++s, ++t : NULL); +} + +/* + * Compiles the regular expression in RE and returns a pointer to the compiled + * regular expression. + * Cflags are passed to regcomp. + */ +static regex_t * +compile_re(char *re, int case_insensitive) +{ + regex_t *rep; + int eval, flags; + + + flags = rflags; + if (case_insensitive) + flags |= REG_ICASE; + if ((rep = malloc(sizeof (regex_t))) == NULL) + err(1, "malloc"); + if ((eval = regcomp(rep, re, flags)) != 0) + fatal(_("RE error: %s"), strregerror(eval, rep)); + if (maxnsub < rep->re_nsub) + maxnsub = rep->re_nsub; + return (rep); +} + +/* + * Compile the substitution string of a regular expression and set res to + * point to a saved copy of it. Nsub is the number of parenthesized regular + * expressions. + */ +static char * +compile_subst(char *p, struct s_subst *s) +{ + static char lbuf[_POSIX2_LINE_MAX + 1]; + int asize; + uintptr_t size; + uchar_t ref; + char c, *text, *op, *sp; + int more = 1, sawesc = 0; + + c = *p++; /* Terminator character */ + if (c == '\0') + return (NULL); + + s->maxbref = 0; + s->linenum = linenum; + asize = 2 * _POSIX2_LINE_MAX + 1; + if ((text = malloc(asize)) == NULL) + err(1, "malloc"); + size = 0; + do { + op = sp = text + size; + for (; *p; p++) { + if (*p == '\\' || sawesc) { + /* + * If this is a continuation from the last + * buffer, we won't have a character to + * skip over. + */ + if (sawesc) + sawesc = 0; + else + p++; + + if (*p == '\0') { + /* + * This escaped character is continued + * in the next part of the line. Note + * this fact, then cause the loop to + * exit w/ normal EOL case and reenter + * above with the new buffer. + */ + sawesc = 1; + p--; + continue; + } else if (strchr("123456789", *p) != NULL) { + *sp++ = '\\'; + ref = *p - '0'; + if (s->re != NULL && + ref > s->re->re_nsub) + fatal(_("not defined in " + "the RE: \\%c"), *p); + if (s->maxbref < ref) + s->maxbref = ref; + } else if (*p == '&' || *p == '\\') + *sp++ = '\\'; + } else if (*p == c) { + if (*++p == '\0' && more) { + if (cu_fgets(lbuf, sizeof (lbuf), + &more)) + p = lbuf; + } + *sp++ = '\0'; + size += (uintptr_t)sp - (uintptr_t)op; + if ((s->new = realloc(text, size)) == NULL) + err(1, "realloc"); + return (p); + } else if (*p == '\n') { + fatal(_("unescaped newline inside " + "substitute pattern")); + /* NOTREACHED */ + } + *sp++ = *p; + } + size += (uintptr_t)sp - (uintptr_t)op; + if (asize - size < _POSIX2_LINE_MAX + 1) { + asize *= 2; + if ((text = realloc(text, asize)) == NULL) + err(1, "realloc"); + } + } while (cu_fgets(p = lbuf, sizeof (lbuf), &more)); + fatal(_("unterminated substitute in regular expression")); + return (NULL); +} + +/* + * Compile the flags of the s command + */ +static char * +compile_flags(char *p, struct s_subst *s) +{ + int gn; /* True if we have seen g or n */ + unsigned long nval; + char wfile[_POSIX2_LINE_MAX + 1], *q; + + s->n = 1; /* Default */ + s->p = 0; + s->wfile = NULL; + s->wfd = -1; + s->icase = 0; + gn = 0; + for (;;) { + EATSPACE(); /* EXTENSION */ + switch (*p) { + case 'g': + if (gn) + fatal(_("more than one number or " + "'g' in substitute flags")); + gn = 1; + s->n = 0; + break; + case '\0': + case '\n': + case ';': + return (p); + case 'p': + s->p = 1; + break; + case 'I': + s->icase = 1; + break; + case '1': case '2': case '3': + case '4': case '5': case '6': + case '7': case '8': case '9': + if (gn) + fatal(_("more than one number or " + "'g' in substitute flags")); + gn = 1; + errno = 0; + nval = strtol(p, &p, 10); + if (errno == ERANGE || nval > INT_MAX) + fatal(_("overflow in the 'N' substitute flag")); + s->n = nval; + p--; + break; + case 'w': + p++; +#ifdef HISTORIC_PRACTICE + if (*p != ' ') { + fatal(_("space missing before w wfile")); + return (p); + } +#endif + EATSPACE(); + q = wfile; + while (*p) { + if (*p == '\n') + break; + *q++ = *p++; + } + *q = '\0'; + if (q == wfile) + fatal(_("no wfile specified")); + s->wfile = strdup(wfile); + if (!aflag && (s->wfd = open(wfile, + O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 0666)) == -1) + err(1, "%s", wfile); + return (p); + default: + fatal(_("bad flag in substitute command: '%c'"), *p); + break; + } + p++; + } +} + +/* + * Compile a translation set of strings into a lookup table. + */ +static char * +compile_tr(char *p, struct s_tr **py) +{ + struct s_tr *y; + int i; + const char *op, *np; + char old[_POSIX2_LINE_MAX + 1]; + char new[_POSIX2_LINE_MAX + 1]; + size_t oclen, oldlen, nclen, newlen; + mbstate_t mbs1, mbs2; + + if ((*py = y = malloc(sizeof (*y))) == NULL) + err(1, NULL); + y->multis = NULL; + y->nmultis = 0; + + if (*p == '\0' || *p == '\\') + fatal(_("transform pattern can not be delimited by " + "newline or backslash")); + p = compile_delimited(p, old, 1); + if (p == NULL) + fatal(_("unterminated transform source string")); + p = compile_delimited(p - 1, new, 1); + if (p == NULL) + fatal(_("unterminated transform target string")); + EATSPACE(); + op = old; + oldlen = mbsrtowcs(NULL, &op, 0, NULL); + if (oldlen == (size_t)-1) + err(1, NULL); + np = new; + newlen = mbsrtowcs(NULL, &np, 0, NULL); + if (newlen == (size_t)-1) + err(1, NULL); + if (newlen != oldlen) + fatal(_("transform strings are not the same length")); + if (MB_CUR_MAX == 1) { + /* + * The single-byte encoding case is easy: generate a + * lookup table. + */ + for (i = 0; i <= UCHAR_MAX; i++) + y->bytetab[i] = (char)i; + for (; *op; op++, np++) + y->bytetab[(uchar_t)*op] = *np; + } else { + /* + * Multi-byte encoding case: generate a lookup table as + * above, but only for single-byte characters. The first + * bytes of multi-byte characters have their lookup table + * entries set to 0, which causes do_tr() to search through + * an auxiliary vector of multi-byte mappings. + */ + (void) memset(&mbs1, 0, sizeof (mbs1)); + (void) memset(&mbs2, 0, sizeof (mbs2)); + for (i = 0; i <= UCHAR_MAX; i++) + y->bytetab[i] = (btowc(i) != WEOF) ? (uchar_t)i : 0; + while (*op != '\0') { + oclen = mbrlen(op, MB_LEN_MAX, &mbs1); + if (oclen == (size_t)-1 || oclen == (size_t)-2) + errx(1, "%s", strerror(EILSEQ)); + nclen = mbrlen(np, MB_LEN_MAX, &mbs2); + if (nclen == (size_t)-1 || nclen == (size_t)-2) + errx(1, "%s", strerror(EILSEQ)); + if (oclen == 1 && nclen == 1) + y->bytetab[(uchar_t)*op] = *np; + else { + y->bytetab[(uchar_t)*op] = 0; + y->multis = realloc(y->multis, + (y->nmultis + 1) * sizeof (*y->multis)); + if (y->multis == NULL) + err(1, NULL); + i = y->nmultis++; + y->multis[i].fromlen = oclen; + (void) memcpy(y->multis[i].from, op, oclen); + y->multis[i].tolen = nclen; + (void) memcpy(y->multis[i].to, np, nclen); + } + op += oclen; + np += nclen; + } + } + return (p); +} + +/* + * Compile the text following an a or i command. + */ +static char * +compile_text(void) +{ + int esc_nl; + uintptr_t size, asize; + char *text, *p, *op, *s; + char lbuf[_POSIX2_LINE_MAX + 1]; + + asize = 2 * _POSIX2_LINE_MAX + 1; + if ((text = malloc(asize)) == NULL) + err(1, "malloc"); + size = 0; + while (cu_fgets(lbuf, sizeof (lbuf), NULL)) { + op = s = text + size; + p = lbuf; + EATSPACE(); + for (esc_nl = 0; *p != '\0'; p++) { + if (*p == '\\' && p[1] != '\0' && *++p == '\n') + esc_nl = 1; + *s++ = *p; + } + size += (uintptr_t)s - (uintptr_t)op; + if (!esc_nl) { + *s = '\0'; + break; + } + if (asize - size < _POSIX2_LINE_MAX + 1) { + asize *= 2; + if ((text = realloc(text, asize)) == NULL) + err(1, "realloc"); + } + } + text[size] = '\0'; + if ((p = realloc(text, size + 1)) == NULL) + err(1, "realloc"); + return (p); +} + +/* + * Get an address and return a pointer to the first character after + * it. Fill the structure pointed to according to the address. + */ +static char * +compile_addr(char *p, struct s_addr *a) +{ + char *end, re[_POSIX2_LINE_MAX + 1]; + int icase; + + icase = 0; + + a->type = 0; + switch (*p) { + case '\\': /* Context address */ + ++p; + /* FALLTHROUGH */ + case '/': /* Context address */ + p = compile_delimited(p, re, 0); + if (p == NULL) + fatal(_("unterminated regular expression")); + + /* Check for case insensitive regexp flag */ + if (*p == 'I') { + icase = 1; + p++; + } + if (*re == '\0') + a->u.r = NULL; + else + a->u.r = compile_re(re, icase); + a->type = AT_RE; + return (p); + + case '$': /* Last line */ + a->type = AT_LAST; + return (p + 1); + + case '+': /* Relative line number */ + a->type = AT_RELLINE; + p++; + /* FALLTHROUGH */ + /* Line number */ + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + if (a->type == 0) + a->type = AT_LINE; + a->u.l = strtol(p, &end, 10); + return (end); + default: + fatal(_("expected context address")); + return (NULL); + } +} + +/* + * duptoeol -- + * Return a copy of all the characters up to \n or \0. + */ +static char * +duptoeol(char *s, const char *ctype) +{ + size_t len; + int ws; + char *p, *start; + + ws = 0; + for (start = s; *s != '\0' && *s != '\n'; ++s) + ws = isspace((unsigned char)*s); + *s = '\0'; + if (ws) + warnx(_("%lu: %s: whitespace after %s"), linenum, fname, ctype); + len = (uintptr_t)s - (uintptr_t)start + 1; + if ((p = malloc(len)) == NULL) + err(1, "malloc"); + return (memmove(p, start, len)); +} + +/* + * Convert goto label names to addresses, and count a and r commands, in + * the given subset of the script. Free the memory used by labels in b + * and t commands (but not by :). + * + * TODO: Remove } nodes + */ +static void +fixuplabel(struct s_command *cp, struct s_command *end) +{ + + for (; cp != end; cp = cp->next) + switch (cp->code) { + case 'a': + case 'r': + appendnum++; + break; + case 'b': + case 't': + /* Resolve branch target. */ + if (cp->t == NULL) { + cp->u.c = NULL; + break; + } + if ((cp->u.c = findlabel(cp->t)) == NULL) + fatal(_("undefined label '%s'"), cp->t); + free(cp->t); + break; + case '{': + /* Do interior commands. */ + fixuplabel(cp->u.c, cp->next); + break; + } +} + +/* + * Associate the given command label for later lookup. + */ +static void +enterlabel(struct s_command *cp) +{ + struct labhash **lhp, *lh; + uchar_t *p; + uint_t h, c; + + for (h = 0, p = (uchar_t *)cp->t; (c = *p) != 0; p++) + h = (h << 5) + h + c; + lhp = &labels[h & LHMASK]; + for (lh = *lhp; lh != NULL; lh = lh->lh_next) + if (lh->lh_hash == h && strcmp(cp->t, lh->lh_cmd->t) == 0) + fatal(_("duplicate label '%s'"), cp->t); + if ((lh = malloc(sizeof (*lh))) == NULL) + err(1, "malloc"); + lh->lh_next = *lhp; + lh->lh_hash = h; + lh->lh_cmd = cp; + lh->lh_ref = 0; + *lhp = lh; +} + +/* + * Find the label contained in the command l in the command linked + * list cp. L is excluded from the search. Return NULL if not found. + */ +static struct s_command * +findlabel(char *name) +{ + struct labhash *lh; + uchar_t *p; + uint_t h, c; + + for (h = 0, p = (uchar_t *)name; (c = *p) != 0; p++) + h = (h << 5) + h + c; + for (lh = labels[h & LHMASK]; lh != NULL; lh = lh->lh_next) { + if (lh->lh_hash == h && strcmp(name, lh->lh_cmd->t) == 0) { + lh->lh_ref = 1; + return (lh->lh_cmd); + } + } + return (NULL); +} + +/* + * Warn about any unused labels. As a side effect, release the label hash + * table space. + */ +static void +uselabel(void) +{ + struct labhash *lh, *next; + int i; + + for (i = 0; i < LHSZ; i++) { + for (lh = labels[i]; lh != NULL; lh = next) { + next = lh->lh_next; + if (!lh->lh_ref) + warnx(_("%lu: %s: unused label '%s'"), + linenum, fname, lh->lh_cmd->t); + free(lh); + } + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/cmd/sed/defs.h Wed Dec 15 17:12:35 2010 -0800 @@ -0,0 +1,150 @@ +/* + * Copyright (c) 1992 Diomidis Spinellis. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Diomidis Spinellis of Imperial College, University of London. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef DEFS_H +#define DEFS_H + +/* + * Types of address specifications + */ +enum e_atype { + AT_RE = 1, /* Line that match RE */ + AT_LINE, /* Specific line */ + AT_RELLINE, /* Relative line */ + AT_LAST /* Last line */ +}; + +/* + * Format of an address + */ +struct s_addr { + enum e_atype type; /* Address type */ + union { + ulong_t l; /* Line number */ + regex_t *r; /* Regular expression */ + } u; +}; + +/* + * Substitution command + */ +struct s_subst { + int n; /* Occurrence to subst. */ + int p; /* True if p flag */ + int icase; /* True if I flag */ + char *wfile; /* NULL if no wfile */ + int wfd; /* Cached file descriptor */ + regex_t *re; /* Regular expression */ + unsigned int maxbref; /* Largest backreference. */ + ulong_t linenum; /* Line number. */ + char *new; /* Replacement text */ +}; + +/* + * Translate command. + */ +struct s_tr { + unsigned char bytetab[256]; + struct trmulti { + size_t fromlen; + char from[MB_LEN_MAX]; + size_t tolen; + char to[MB_LEN_MAX]; + } *multis; + int nmultis; +}; + +/* + * An internally compiled command. + * Initialy, label references are stored in t, on a second pass they + * are updated to pointers. + */ +struct s_command { + struct s_command *next; /* Pointer to next command */ + struct s_addr *a1, *a2; /* Start and end address */ + ulong_t startline; /* Start line number or zero */ + char *t; /* Text for : a c i r w */ + union { + struct s_command *c; /* Command(s) for b t { */ + struct s_subst *s; /* Substitute command */ + struct s_tr *y; /* Replace command array */ + int fd; /* File descriptor for w */ + } u; + char code; /* Command code */ + uint_t nonsel:1; /* True if ! */ +}; + +/* + * Types of command arguments recognised by the parser + */ +enum e_args { + EMPTY, /* d D g G h H l n N p P q x = \0 */ + TEXT, /* a c i */ + NONSEL, /* ! */ + GROUP, /* { */ + ENDGROUP, /* } */ + COMMENT, /* # */ + BRANCH, /* b t */ + LABEL, /* : */ + RFILE, /* r */ + WFILE, /* w */ + SUBST, /* s */ + TR /* y */ +}; + +/* + * Structure containing things to append before a line is read + */ +struct s_appends { + enum {AP_STRING, AP_FILE} type; + char *s; + size_t len; +}; + +enum e_spflag { + APPEND, /* Append to the contents. */ + REPLACE /* Replace the contents. */ +}; + +/* + * Structure for a space (process, hold, otherwise). + */ +typedef struct { + char *space; /* Current space pointer. */ + size_t len; /* Current length. */ + int deleted; /* If deleted. */ + char *back; /* Backing memory. */ + size_t blen; /* Backing memory length. */ +} SPACE; + +#endif /* DEFS_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/cmd/sed/extern.h Wed Dec 15 17:12:35 2010 -0800 @@ -0,0 +1,67 @@ +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 1992 Diomidis Spinellis. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Diomidis Spinellis of Imperial College, University of London. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef EXTERN_H +#define EXTERN_H + +extern struct s_command *prog; +extern struct s_appends *appends; +extern regmatch_t *match; +extern size_t maxnsub; +extern ulong_t linenum; +extern int appendnum; +extern int aflag, eflag, nflag; +extern const char *fname, *outfname; +extern FILE *infile, *outfile; +extern int rflags; /* regex flags to use */ + +void cfclose(struct s_command *, struct s_command *); +void compile(void); +void cspace(SPACE *, const char *, size_t, enum e_spflag); +char *cu_fgets(char *, int, int *); +int mf_fgets(SPACE *, enum e_spflag); +int lastline(void); +void process(void); +void resetstate(void); +char *strregerror(int, regex_t *); +/*PRINTFLIKE1*/ +void fatal(const char *, ...); /* output includes file and line # */ + +#ifdef lint +#define _(s) s +#else +#define _(s) gettext(s) +#endif + +#endif /* EXTERN_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/cmd/sed/main.c Wed Dec 15 17:12:35 2010 -0800 @@ -0,0 +1,520 @@ +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 1992 Diomidis Spinellis. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Diomidis Spinellis of Imperial College, University of London. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/param.h> +#include <sys/stat.h> + +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <libgen.h> +#include <limits.h> +#include <locale.h> +#include <regex.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <libintl.h> + +#include "defs.h" +#include "extern.h" + +/* + * Linked list of units (strings and files) to be compiled + */ +struct s_compunit { + struct s_compunit *next; + enum e_cut {CU_FILE, CU_STRING} type; + char *s; /* Pointer to string or fname */ +}; + +/* + * Linked list pointer to compilation units and pointer to current + * next pointer. + */ +static struct s_compunit *script, **cu_nextp = &script; + +/* + * Linked list of files to be processed + */ +struct s_flist { + char *fname; + struct s_flist *next; +}; + +/* + * Linked list pointer to files and pointer to current + * next pointer. + */ +static struct s_flist *files, **fl_nextp = &files; + +FILE *infile; /* Current input file */ +FILE *outfile; /* Current output file */ + +int aflag, eflag, nflag; +int rflags = 0; +static int rval; /* Exit status */ + +static int ispan; /* Whether inplace editing spans across files */ + +/* + * Current file and line number; line numbers restart across compilation + * units, but span across input files. The latter is optional if editing + * in place. + */ +const char *fname; /* File name. */ +const char *outfname; /* Output file name */ +static char oldfname[PATH_MAX]; /* Old file name (for in-place editing) */ +static char tmpfname[PATH_MAX]; /* Temporary file name (for in-place editing) */ +static const char *inplace; /* Inplace edit file extension. */ +ulong_t linenum; + +static void add_compunit(enum e_cut, char *); +static void add_file(char *); +static void usage(void); +static char *getln(FILE *, size_t *); + + +int +main(int argc, char *argv[]) +{ + int c, fflag; + char *temp_arg; + + (void) setlocale(LC_ALL, ""); + +#ifndef TEXT_DOMAIN +#define TEXT_DOMAIN "SYS_TEST" +#endif + (void) textdomain(TEXT_DOMAIN); + + fflag = 0; + inplace = NULL; + + while ((c = getopt(argc, argv, "EI:ae:f:i:lnr")) != -1) + switch (c) { + case 'r': /* Gnu sed compat */ + case 'E': + rflags = REG_EXTENDED; + break; + case 'I': + inplace = optarg; + ispan = 1; /* span across input files */ + break; + case 'a': + aflag = 1; + break; + case 'e': + eflag = 1; + if (asprintf(&temp_arg, "%s\n", optarg) <= 1) + err(1, "asprintf"); + add_compunit(CU_STRING, temp_arg); + break; + case 'f': + fflag = 1; + add_compunit(CU_FILE, optarg); + break; + case 'i': + inplace = optarg; + ispan = 0; /* don't span across input files */ + break; + case 'l': + /* On SunOS, setlinebuf "returns no useful value */ + (void) setlinebuf(stdout); + break; + case 'n': + nflag = 1; + break; + default: + case '?': + usage(); + } + argc -= optind; + argv += optind; + + /* First usage case; script is the first arg */ + if (!eflag && !fflag && *argv) { + add_compunit(CU_STRING, *argv); + argv++; + } + + compile(); + + /* Continue with first and start second usage */ + if (*argv) + for (; *argv; argv++) + add_file(*argv); + else + add_file(NULL); + process(); + cfclose(prog, NULL); + if (fclose(stdout)) + err(1, "stdout"); + return (rval); +} + +static void +usage(void) +{ + (void) fputs(_("usage: sed script [-Ealn] [-i extension] [file ...]\n" + " sed [-Ealn] [-i extension] [-e script] ... " + "[-f script_file] ... [file ...]"), + stderr); + exit(1); +} + +/* + * Like fgets, but go through the chain of compilation units chaining them + * together. Empty strings and files are ignored. + */ +char * +cu_fgets(char *buf, int n, int *more) +{ + static enum {ST_EOF, ST_FILE, ST_STRING} state = ST_EOF; + static FILE *f; /* Current open file */ + static char *s; /* Current pointer inside string */ + static char string_ident[30]; + char *p; + +again: + switch (state) { + case ST_EOF: + if (script == NULL) { + if (more != NULL) + *more = 0; + return (NULL); + } + linenum = 0; + switch (script->type) { + case CU_FILE: + if ((f = fopen(script->s, "r")) == NULL) + err(1, "%s", script->s); + fname = script->s; + state = ST_FILE; + goto again; + case CU_STRING: + if (((size_t)snprintf(string_ident, + sizeof (string_ident), "\"%s\"", script->s)) >= + sizeof (string_ident) - 1) + (void) strcpy(string_ident + + sizeof (string_ident) - 6, " ...\""); + fname = string_ident; + s = script->s; + state = ST_STRING; + goto again; + } + /*NOTREACHED*/ + + case ST_FILE: + if ((p = fgets(buf, n, f)) != NULL) { + linenum++; + if (linenum == 1 && buf[0] == '#' && buf[1] == 'n') + nflag = 1; + if (more != NULL) + *more = !feof(f); + return (p); + } + script = script->next; + (void) fclose(f); + state = ST_EOF; + goto again; + case ST_STRING: + if (linenum == 0 && s[0] == '#' && s[1] == 'n') + nflag = 1; + p = buf; + for (;;) { + if (n-- <= 1) { + *p = '\0'; + linenum++; + if (more != NULL) + *more = 1; + return (buf); + } + switch (*s) { + case '\0': + state = ST_EOF; + if (s == script->s) { + script = script->next; + goto again; + } else { + script = script->next; + *p = '\0'; + linenum++; + if (more != NULL) + *more = 0; + return (buf); + } + case '\n': + *p++ = '\n'; + *p = '\0'; + s++; + linenum++; + if (more != NULL) + *more = 0; + return (buf); + default: + *p++ = *s++; + } + } + } + /* NOTREACHED */ + return (NULL); +} + +/* + * Like fgets, but go through the list of files chaining them together. + * Set len to the length of the line. + */ +int +mf_fgets(SPACE *sp, enum e_spflag spflag) +{ + struct stat sb; + size_t len; + char *p; + int c; + static int firstfile; + + if (infile == NULL) { + /* stdin? */ + if (files->fname == NULL) { + if (inplace != NULL) + errx(1, + _("-I or -i may not be used with stdin")); + infile = stdin; + fname = "stdin"; + outfile = stdout; + outfname = "stdout"; + } + firstfile = 1; + } + + for (;;) { + if (infile != NULL && (c = getc(infile)) != EOF) { + (void) ungetc(c, infile); + break; + } + /* If we are here then either eof or no files are open yet */ + if (infile == stdin) { + sp->len = 0; + return (0); + } + if (infile != NULL) { + (void) fclose(infile); + if (*oldfname != '\0') { + if (link(fname, oldfname) != 0) { + warn("link()"); + (void) unlink(tmpfname); + exit(1); + } + *oldfname = '\0'; + } + if (*tmpfname != '\0') { + if (outfile != NULL && outfile != stdout) + if (fclose(outfile) != 0) { + warn("fclose()"); + (void) unlink(tmpfname); + exit(1); + } + outfile = NULL; + if (rename(tmpfname, fname) != 0) { + /* this should not happen really! */ + warn("rename()"); + (void) unlink(tmpfname); + exit(1); + } + *tmpfname = '\0'; + } + outfname = NULL; + } + if (firstfile == 0) + files = files->next; + else + firstfile = 0; + if (files == NULL) { + sp->len = 0; + return (0); + } + fname = files->fname; + if (inplace != NULL) { + char bn[PATH_MAX]; + char dn[PATH_MAX]; + (void) strlcpy(bn, fname, sizeof (bn)); + (void) strlcpy(dn, fname, sizeof (dn)); + if (lstat(fname, &sb) != 0) + err(1, "%s", fname); + if (!(sb.st_mode & S_IFREG)) + fatal(_("in-place editing only " + "works for regular files")); + if (*inplace != '\0') { + (void) strlcpy(oldfname, fname, + sizeof (oldfname)); + len = strlcat(oldfname, inplace, + sizeof (oldfname)); + if (len > sizeof (oldfname)) + fatal(_("name too long")); + } + len = snprintf(tmpfname, sizeof (tmpfname), + "%s/.!%ld!%s", dirname(dn), (long)getpid(), + basename(bn)); + if (len >= sizeof (tmpfname)) + fatal(_("name too long")); + (void) unlink(tmpfname); + if ((outfile = fopen(tmpfname, "w")) == NULL) + err(1, "%s", fname); + if (fchown(fileno(outfile), sb.st_uid, sb.st_gid) != 0) + warn("fchown()"); + if (fchmod(fileno(outfile), sb.st_mode & 07777) != 0) + warn("fchmod()"); + outfname = tmpfname; + if (!ispan) { + linenum = 0; + resetstate(); + } + } else { + outfile = stdout; + outfname = "stdout"; + } + if ((infile = fopen(fname, "r")) == NULL) { + warn("%s", fname); + rval = 1; + continue; + } + } + /* + * We are here only when infile is open and we still have something + * to read from it. + * + * Use fgetln so that we can handle essentially infinite input data. + * Can't use the pointer into the stdio buffer as the process space + * because the ungetc() can cause it to move. + */ + p = getln(infile, &len); + if (ferror(infile)) + errx(1, "%s: %s", fname, strerror(errno ? errno : EIO)); + if (len != 0 && p[len - 1] == '\n') + len--; + cspace(sp, p, len, spflag); + + linenum++; + + return (1); +} + +/* + * Add a compilation unit to the linked list + */ +static void +add_compunit(enum e_cut type, char *s) +{ + struct s_compunit *cu; + + if ((cu = malloc(sizeof (struct s_compunit))) == NULL) + err(1, "malloc"); + cu->type = type; + cu->s = s; + cu->next = NULL; + *cu_nextp = cu; + cu_nextp = &cu->next; +} + +/* + * Add a file to the linked list + */ +static void +add_file(char *s) +{ + struct s_flist *fp; + + if ((fp = malloc(sizeof (struct s_flist))) == NULL) + err(1, "malloc"); + fp->next = NULL; + *fl_nextp = fp; + fp->fname = s; + fl_nextp = &fp->next; +} + +int +lastline(void) +{ + int ch; + + if (files->next != NULL && (inplace == NULL || ispan)) + return (0); + if ((ch = getc(infile)) == EOF) + return (1); + (void) ungetc(ch, infile); + return (0); +} + +char * +getln(FILE *in, size_t *lenp) +{ + static char *buffer = NULL; + static size_t sz = 0; + + size_t len = 0; + + for (;;) { + if (sz <= (len + 1)) { + char *nb; + if ((nb = realloc(buffer, sz + LINE_MAX)) == NULL) { + err(1, "realloc"); + } + buffer = nb; + sz += LINE_MAX; + } + + buffer[len] = 0; + + if (fgets(buffer + len, sz - len, in) == NULL) { + /* END OF FILE */ + *lenp = len; + break; + } + + len += strlen(buffer + len); + + if (buffer[len - 1] == '\n') { + /* got the new line */ + *lenp = len; + break; + } + } + + return (buffer); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/cmd/sed/misc.c Wed Dec 15 17:12:35 2010 -0800 @@ -0,0 +1,82 @@ +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 1992 Diomidis Spinellis. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Diomidis Spinellis of Imperial College, University of London. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> + +#include <err.h> +#include <limits.h> +#include <regex.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> + +#include "defs.h" +#include "extern.h" + +/* + * Return a string for a regular expression error passed. This is overkill, + * because of the silly semantics of regerror (we can never know the size of + * the buffer). + */ +char * +strregerror(int errcode, regex_t *preg) +{ + static char *oe; + size_t s; + + if (oe != NULL) + free(oe); + s = regerror(errcode, preg, NULL, 0); + if ((oe = malloc(s)) == NULL) + err(1, "malloc"); + (void) regerror(errcode, preg, oe, s); + return (oe); +} + +void +fatal(const char *fmt, ...) +{ + va_list ap; + + (void) fprintf(stderr, "%s: %lu: ", fname, linenum); + + va_start(ap, fmt); + (void) vfprintf(stderr, fmt, ap); + va_end(ap); + + (void) fputc('\n', stderr); + + exit(1); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/cmd/sed/process.c Wed Dec 15 17:12:35 2010 -0800 @@ -0,0 +1,767 @@ +/* + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 1992 Diomidis Spinellis. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Diomidis Spinellis of Imperial College, University of London. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> + +#include <ctype.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <regex.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <wchar.h> +#include <wctype.h> +#include <termio.h> +#include <libintl.h> +#include <note.h> + +#include "defs.h" +#include "extern.h" + +static SPACE HS, PS, SS, YS; +#define pd PS.deleted +#define ps PS.space +#define psl PS.len +#define hs HS.space +#define hsl HS.len + +static int applies(struct s_command *); +static void do_tr(struct s_tr *); +static void flush_appends(void); +static void lputs(char *, size_t); +static int regexec_e(regex_t *, const char *, int, int, size_t); +static void regsub(SPACE *, char *, char *); +static int substitute(struct s_command *); + +struct s_appends *appends; /* Array of pointers to strings to append. */ +static int appendx; /* Index into appends array. */ +int appendnum; /* Size of appends array. */ + +static int lastaddr; /* Set by applies if last address of a range. */ +static int sdone; /* If any substitutes since last line input. */ + /* Iov structure for 'w' commands. */ +static regex_t *defpreg; +size_t maxnsub; +regmatch_t *match; + +#define OUT() do { \ + (void) fwrite(ps, 1, psl, outfile); \ + (void) fputc('\n', outfile); \ + _NOTE(CONSTCOND) \ +} while (0) + +void +process(void) +{ + struct s_command *cp; + SPACE tspace; + size_t oldpsl = 0; + char *p; + + p = NULL; + + for (linenum = 0; mf_fgets(&PS, REPLACE); /* NOP */) { + pd = 0; +top: + cp = prog; +redirect: + while (cp != NULL) { + if (!applies(cp)) { + cp = cp->next; + continue; + } + switch (cp->code) { + case '{': + cp = cp->u.c; + goto redirect; + case 'a': + if (appendx >= appendnum) + if ((appends = realloc(appends, + sizeof (struct s_appends) * + (appendnum *= 2))) == NULL) + err(1, "realloc"); + appends[appendx].type = AP_STRING; + appends[appendx].s = cp->t; + appends[appendx].len = strlen(cp->t); + appendx++; + break; + case 'b': + cp = cp->u.c; + goto redirect; + case 'c': + pd = 1; + psl = 0; + if (cp->a2 == NULL || lastaddr || lastline()) + (void) fprintf(outfile, "%s", cp->t); + break; + case 'd': + pd = 1; + goto new; + case 'D': + if (pd) + goto new; + if (psl == 0 || + (p = memchr(ps, '\n', psl)) == NULL) { + pd = 1; + goto new; + } else { + psl -= + (uintptr_t)(p + 1) - (uintptr_t)ps; + (void) memmove(ps, p + 1, psl); + goto top; + } + case 'g': + cspace(&PS, hs, hsl, REPLACE); + break; + case 'G': + cspace(&PS, "\n", 1, APPEND); + cspace(&PS, hs, hsl, APPEND); + break; + case 'h': + cspace(&HS, ps, psl, REPLACE); + break; + case 'H': + cspace(&HS, "\n", 1, APPEND); + cspace(&HS, ps, psl, APPEND); + break; + case 'i': + (void) fprintf(outfile, "%s", cp->t); + break; + case 'l': + lputs(ps, psl); + break; + case 'n': + if (!nflag && !pd) + OUT(); + flush_appends(); + if (!mf_fgets(&PS, REPLACE)) + exit(0); + pd = 0; + break; + case 'N': + flush_appends(); + cspace(&PS, "\n", 1, APPEND); + if (!mf_fgets(&PS, APPEND)) + exit(0); + break; + case 'p': + if (pd) + break; + OUT(); + break; + case 'P': + if (pd) + break; + if ((p = memchr(ps, '\n', psl)) != NULL) { + oldpsl = psl; + psl = (uintptr_t)p - (uintptr_t)ps; + } + OUT(); + if (p != NULL) + psl = oldpsl; + break; + case 'q': + if (!nflag && !pd) + OUT(); + flush_appends(); + exit(0); + /*NOTREACHED*/ + case 'r': + if (appendx >= appendnum) + if ((appends = realloc(appends, + sizeof (struct s_appends) * + (appendnum *= 2))) == NULL) + err(1, "realloc"); + appends[appendx].type = AP_FILE; + appends[appendx].s = cp->t; + appends[appendx].len = strlen(cp->t); + appendx++; + break; + case 's': + sdone |= substitute(cp); + break; + case 't': + if (sdone) { + sdone = 0; + cp = cp->u.c; + goto redirect; + } + break; + case 'w': + if (pd) + break; + if (cp->u.fd == -1 && (cp->u.fd = open(cp->t, + O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 0666)) + == -1) + err(1, "%s", cp->t); + if (write(cp->u.fd, ps, psl) != (ssize_t)psl || + write(cp->u.fd, "\n", 1) != 1) + err(1, "%s", cp->t); + break; + case 'x': + /* + * If the hold space is null, make it empty + * but not null. Otherwise the pattern space + * will become null after the swap, which is + * an abnormal condition. + */ + if (hs == NULL) + cspace(&HS, "", 0, REPLACE); + tspace = PS; + PS = HS; + HS = tspace; + break; + case 'y': + if (pd || psl == 0) + break; + do_tr(cp->u.y); + break; + case ':': + case '}': + break; + case '=': + (void) fprintf(outfile, "%lu\n", linenum); + } + cp = cp->next; + } /* for all cp */ + +new: if (!nflag && !pd) + OUT(); + flush_appends(); + } /* for all lines */ +} + +/* + * TRUE if the address passed matches the current program state + * (lastline, linenumber, ps). + */ +#define MATCH(a) \ + ((a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \ + (a)->type == AT_LINE ? linenum == (a)->u.l : lastline()) + +/* + * Return TRUE if the command applies to the current line. Sets the start + * line for process ranges. Interprets the non-select (``!'') flag. + */ +static int +applies(struct s_command *cp) +{ + int r; + + lastaddr = 0; + if (cp->a1 == NULL && cp->a2 == NULL) + r = 1; + else if (cp->a2) + if (cp->startline > 0) { + if (MATCH(cp->a2)) { + cp->startline = 0; + lastaddr = 1; + r = 1; + } else if (linenum - cp->startline <= cp->a2->u.l) + r = 1; + else if ((cp->a2->type == AT_LINE && + linenum > cp->a2->u.l) || + (cp->a2->type == AT_RELLINE && + linenum - cp->startline > cp->a2->u.l)) { + /* + * We missed the 2nd address due to a branch, + * so just close the range and return false. + */ + cp->startline = 0; + r = 0; + } else + r = 1; + } else if (MATCH(cp->a1)) { + /* + * If the second address is a number less than or + * equal to the line number first selected, only + * one line shall be selected. + * -- POSIX 1003.2 + * Likewise if the relative second line address is zero. + */ + if ((cp->a2->type == AT_LINE && + linenum >= cp->a2->u.l) || + (cp->a2->type == AT_RELLINE && cp->a2->u.l == 0)) + lastaddr = 1; + else { + cp->startline = linenum; + } + r = 1; + } else + r = 0; + else + r = MATCH(cp->a1); + return (cp->nonsel ? ! r : r); +} + +/* + * Reset the sed processor to its initial state. + */ +void +resetstate(void) +{ + struct s_command *cp; + + /* + * Reset all in-range markers. + */ + for (cp = prog; cp; cp = cp->code == '{' ? cp->u.c : cp->next) + if (cp->a2) + cp->startline = 0; + + /* + * Clear out the hold space. + */ + cspace(&HS, "", 0, REPLACE); +} + +/* + * substitute -- + * Do substitutions in the pattern space. Currently, we build a + * copy of the new pattern space in the substitute space structure + * and then swap them. + */ +static int +substitute(struct s_command *cp) +{ + SPACE tspace; + regex_t *re; + regoff_t re_off, slen; + int lastempty, n; + char *s; + + s = ps; + re = cp->u.s->re; + if (re == NULL) { + if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) { + linenum = cp->u.s->linenum; + fatal(_("\\%u not defined in the RE"), + cp->u.s->maxbref); + } + } + if (!regexec_e(re, s, 0, 0, psl)) + return (0); + + SS.len = 0; /* Clean substitute space. */ + slen = psl; + n = cp->u.s->n; + lastempty = 1; + + switch (n) { + case 0: /* Global */ + do { + if (lastempty || match[0].rm_so != match[0].rm_eo) { + /* Locate start of replaced string. */ + re_off = match[0].rm_so; + /* Copy leading retained string. */ + cspace(&SS, s, re_off, APPEND); + /* Add in regular expression. */ + regsub(&SS, s, cp->u.s->new); + } + + /* Move past this match. */ + if (match[0].rm_so != match[0].rm_eo) { + s += match[0].rm_eo; + slen -= match[0].rm_eo; + lastempty = 0; + } else { + if (match[0].rm_so < slen) + cspace(&SS, s + match[0].rm_so, 1, + APPEND); + s += match[0].rm_so + 1; + slen -= match[0].rm_so + 1; + lastempty = 1; + } + } while (slen >= 0 && regexec_e(re, s, REG_NOTBOL, 0, slen)); + /* Copy trailing retained string. */ + if (slen > 0) + cspace(&SS, s, slen, APPEND); + break; + default: /* Nth occurrence */ + while (--n) { + if (match[0].rm_eo == match[0].rm_so) + match[0].rm_eo = match[0].rm_so + 1; + s += match[0].rm_eo; + slen -= match[0].rm_eo; + if (slen < 0) + return (0); + if (!regexec_e(re, s, REG_NOTBOL, 0, slen)) + return (0); + } + /* FALLTHROUGH */ + case 1: /* 1st occurrence */ + /* Locate start of replaced string. */ + re_off = match[0].rm_so + ((uintptr_t)s - (uintptr_t)ps); + /* Copy leading retained string. */ + cspace(&SS, ps, re_off, APPEND); + /* Add in regular expression. */ + regsub(&SS, s, cp->u.s->new); + /* Copy trailing retained string. */ + s += match[0].rm_eo; + slen -= match[0].rm_eo; + cspace(&SS, s, slen, APPEND); + break; + } + + /* + * Swap the substitute space and the pattern space, and make sure + * that any leftover pointers into stdio memory get lost. + */ + tspace = PS; + PS = SS; + SS = tspace; + SS.space = SS.back; + + /* Handle the 'p' flag. */ + if (cp->u.s->p) + OUT(); + + /* Handle the 'w' flag. */ + if (cp->u.s->wfile && !pd) { + if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile, + O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 0666)) == -1) + err(1, "%s", cp->u.s->wfile); + if (write(cp->u.s->wfd, ps, psl) != (ssize_t)psl || + write(cp->u.s->wfd, "\n", 1) != 1) + err(1, "%s", cp->u.s->wfile); + } + return (1); +} + +/* + * do_tr -- + * Perform translation ('y' command) in the pattern space. + */ +static void +do_tr(struct s_tr *y) +{ + SPACE tmp; + char c, *p; + size_t clen, left; + int i; + + if (MB_CUR_MAX == 1) { + /* + * Single-byte encoding: perform in-place translation + * of the pattern space. + */ + for (p = ps; p < &ps[psl]; p++) + *p = y->bytetab[(uchar_t)*p]; + } else { + /* + * Multi-byte encoding: perform translation into the + * translation space, then swap the translation and + * pattern spaces. + */ + /* Clean translation space. */ + YS.len = 0; + for (p = ps, left = psl; left > 0; p += clen, left -= clen) { + if ((c = y->bytetab[(uchar_t)*p]) != '\0') { + cspace(&YS, &c, 1, APPEND); + clen = 1; + continue; + } + for (i = 0; i < y->nmultis; i++) + if (left >= y->multis[i].fromlen && + memcmp(p, y->multis[i].from, + y->multis[i].fromlen) == 0) + break; + if (i < y->nmultis) { + cspace(&YS, y->multis[i].to, + y->multis[i].tolen, APPEND); + clen = y->multis[i].fromlen; + } else { + cspace(&YS, p, 1, APPEND); + clen = 1; + } + } + /* Swap the translation space and the pattern space. */ + tmp = PS; + PS = YS; + YS = tmp; + YS.space = YS.back; + } +} + +/* + * Flush append requests. Always called before reading a line, + * therefore it also resets the substitution done (sdone) flag. + */ +static void +flush_appends(void) +{ + FILE *f; + int count, i; + char buf[8 * 1024]; + + for (i = 0; i < appendx; i++) + switch (appends[i].type) { + case AP_STRING: + (void) fwrite(appends[i].s, sizeof (char), + appends[i].len, outfile); + break; + case AP_FILE: + /* + * Read files probably shouldn't be cached. Since + * it's not an error to read a non-existent file, + * it's possible that another program is interacting + * with the sed script through the filesystem. It + * would be truly bizarre, but possible. It's probably + * not that big a performance win, anyhow. + */ + if ((f = fopen(appends[i].s, "r")) == NULL) + break; + while ((count = + fread(buf, sizeof (char), sizeof (buf), f))) + (void) fwrite(buf, sizeof (char), count, + outfile); + (void) fclose(f); + break; + } + if (ferror(outfile)) + errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO)); + appendx = sdone = 0; +} + +static void +lputs(char *s, size_t len) +{ + static const char escapes[] = "\\\a\b\f\r\t\v"; + int c, col, width; + const char *p; + struct winsize win; + static int termwidth = -1; + size_t clen, i; + wchar_t wc; + mbstate_t mbs; + + if (outfile != stdout) + termwidth = 60; + if (termwidth == -1) { + if (((p = getenv("COLUMNS")) != NULL) && (*p != '\0')) + termwidth = atoi(p); + else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 && + win.ws_col > 0) + termwidth = win.ws_col; + else + termwidth = 60; + } + if (termwidth <= 0) + termwidth = 1; + + (void) memset(&mbs, 0, sizeof (mbs)); + col = 0; + while (len != 0) { + clen = mbrtowc(&wc, s, len, &mbs); + if (clen == 0) + clen = 1; + if (clen == (size_t)-1 || clen == (size_t)-2) { + wc = (unsigned char)*s; + clen = 1; + (void) memset(&mbs, 0, sizeof (mbs)); + } + if (wc == '\n') { + if (col + 1 >= termwidth) + (void) fprintf(outfile, "\\\n"); + (void) fputc('$', outfile); + (void) fputc('\n', outfile); + col = 0; + } else if (iswprint(wc)) { + width = wcwidth(wc); + if (col + width >= termwidth) { + (void) fprintf(outfile, "\\\n"); + col = 0; + } + (void) fwrite(s, 1, clen, outfile); + col += width; + } else if (wc != L'\0' && (c = wctob(wc)) != EOF && + (p = strchr(escapes, c)) != NULL) { + if (col + 2 >= termwidth) { + (void) fprintf(outfile, "\\\n"); + col = 0; + } + (void) fprintf(outfile, "\\%c", + "\\abfrtv"[(uintptr_t)p - (uintptr_t)escapes]); + col += 2; + } else { + if (col + 4 * clen >= (unsigned)termwidth) { + (void) fprintf(outfile, "\\\n"); + col = 0; + } + for (i = 0; i < clen; i++) + (void) fprintf(outfile, "\\%03o", + (int)(unsigned char)s[i]); + col += 4 * clen; + } + s += clen; + len -= clen; + } + if (col + 1 >= termwidth) + (void) fprintf(outfile, "\\\n"); + (void) fputc('$', outfile); + (void) fputc('\n', outfile); + if (ferror(outfile)) + errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO)); +} + +static int +regexec_e(regex_t *preg, const char *string, int eflags, int nomatch, + size_t slen) +{ + int eval; + + if (preg == NULL) { + if (defpreg == NULL) + fatal(_("first RE may not be empty")); + } else + defpreg = preg; + + /* Set anchors */ + match[0].rm_so = 0; + match[0].rm_eo = slen; + + eval = regexec(defpreg, string, + nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND); + switch (eval) { + case 0: + return (1); + case REG_NOMATCH: + return (0); + } + fatal(_("RE error: %s"), strregerror(eval, defpreg)); + return (0); +} + +/* + * regsub - perform substitutions after a regexp match + * Based on a routine by Henry Spencer + */ +static void +regsub(SPACE *sp, char *string, char *src) +{ + int len, no; + char c, *dst; + +#define NEEDSP(reqlen) \ + /* XXX What is the +1 for? */ \ + if (sp->len + (reqlen) + 1 >= sp->blen) { \ + sp->blen += (reqlen) + 1024; \ + if ((sp->back = realloc(sp->back, sp->blen)) == NULL) \ + err(1, "realloc"); \ + sp->space = sp->back; \ + dst = sp->space + sp->len; \ + } + + dst = sp->space + sp->len; + while ((c = *src++) != '\0') { + if (c == '&') + no = 0; + else if (c == '\\' && isdigit((unsigned char)*src)) + no = *src++ - '0'; + else + no = -1; + if (no < 0) { /* Ordinary character. */ + if (c == '\\' && (*src == '\\' || *src == '&')) + c = *src++; + NEEDSP(1); + *dst++ = c; + ++sp->len; + } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) { + len = match[no].rm_eo - match[no].rm_so; + NEEDSP(len); + (void) memmove(dst, string + match[no].rm_so, len); + dst += len; + sp->len += len; + } + } + NEEDSP(1); + *dst = '\0'; +} + +/* + * cspace -- + * Concatenate space: append the source space to the destination space, + * allocating new space as necessary. + */ +void +cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag) +{ + size_t tlen; + + /* Make sure SPACE has enough memory and ramp up quickly. */ + tlen = sp->len + len + 1; + if (tlen > sp->blen) { + sp->blen = tlen + 1024; + if ((sp->space = sp->back = realloc(sp->back, sp->blen)) == + NULL) + err(1, "realloc"); + } + + if (spflag == REPLACE) + sp->len = 0; + + (void) memmove(sp->space + sp->len, p, len); + + sp->space[sp->len += len] = '\0'; +} + +/* + * Close all cached opened files and report any errors + */ +void +cfclose(struct s_command *cp, struct s_command *end) +{ + + for (; cp != end; cp = cp->next) + switch (cp->code) { + case 's': + if (cp->u.s->wfd != -1 && close(cp->u.s->wfd)) + err(1, "%s", cp->u.s->wfile); + cp->u.s->wfd = -1; + break; + case 'w': + if (cp->u.fd != -1 && close(cp->u.fd)) + err(1, "%s", cp->t); + cp->u.fd = -1; + break; + case '{': + cfclose(cp->u.c, cp->next); + break; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/cmd/sed/sed.1 Wed Dec 15 17:12:35 2010 -0800 @@ -0,0 +1,636 @@ +.\" Copyright (c) 1992, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" the Institute of Electrical and Electronics Engineers, Inc. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)sed.1 8.2 (Berkeley) 12/30/93 +.\" $FreeBSD$ +.\" +.Dd May 24, 2009 +.Dt SED 1 +.Os +.Sh NAME +.Nm sed +.Nd stream editor +.Sh SYNOPSIS +.Nm +.Op Fl Ealnr +.Ar command +.Op Ar +.Nm +.Op Fl Ealnr +.Op Fl e Ar command +.Op Fl f Ar command_file +.Op Fl I Ar extension +.Op Fl i Ar extension +.Op Ar +.Sh DESCRIPTION +The +.Nm +utility reads the specified files, or the standard input if no files +are specified, modifying the input as specified by a list of commands. +The input is then written to the standard output. +.Pp +A single command may be specified as the first argument to +.Nm . +Multiple commands may be specified by using the +.Fl e +or +.Fl f +options. +All commands are applied to the input in the order they are specified +regardless of their origin. +.Pp +The following options are available: +.Bl -tag -width indent +.It Fl E +Interpret regular expressions as extended (modern) regular expressions +rather than basic regular expressions (BRE's). +The +.Xr re_format 7 +manual page fully describes both formats. +.It Fl a +The files listed as parameters for the +.Dq w +functions are created (or truncated) before any processing begins, +by default. +The +.Fl a +option causes +.Nm +to delay opening each file until a command containing the related +.Dq w +function is applied to a line of input. +.It Fl e Ar command +Append the editing commands specified by the +.Ar command +argument +to the list of commands. +.It Fl f Ar command_file +Append the editing commands found in the file +.Ar command_file +to the list of commands. +The editing commands should each be listed on a separate line. +.It Fl I Ar extension +Edit files in-place, saving backups with the specified +.Ar extension . +If a zero-length +.Ar extension +is given, no backup will be saved. +It is not recommended to give a zero-length +.Ar extension +when in-place editing files, as you risk corruption or partial content +in situations where disk space is exhausted, etc. +.Pp +Note that in-place editing with +.Fl I +still takes place in a single continuous line address space covering +all files, although each file preserves its individuality instead of +forming one output stream. +The line counter is never reset between files, address ranges can span +file boundaries, and the +.Dq $ +address matches only the last line of the last file. +(See +.Sx "Sed Addresses" . ) +That can lead to unexpected results in many cases of in-place editing, +where using +.Fl i +is desired. +.It Fl i Ar extension +Edit files in-place similarly to +.Fl I , +but treat each file independently from other files. +In particular, line numbers in each file start at 1, +the +.Dq $ +address matches the last line of the current file, +and address ranges are limited to the current file. +(See +.Sx "Sed Addresses" . ) +The net result is as though each file were edited by a separate +.Nm +instance. +.It Fl l +Make output line buffered. +.It Fl n +By default, each line of input is echoed to the standard output after +all of the commands have been applied to it. +The +.Fl n +option suppresses this behavior. +.It Fl r +Same as +.Fl E +for compatibility with GNU sed. +.El +.Pp +The form of a +.Nm +command is as follows: +.Pp +.Dl [address[,address]]function[arguments] +.Pp +Whitespace may be inserted before the first address and the function +portions of the command. +.Pp +Normally, +.Nm +cyclically copies a line of input, not including its terminating newline +character, into a +.Em "pattern space" , +(unless there is something left after a +.Dq D +function), +applies all of the commands with addresses that select that pattern space, +copies the pattern space to the standard output, appending a newline, and +deletes the pattern space. +.Pp +Some of the functions use a +.Em "hold space" +to save all or part of the pattern space for subsequent retrieval. +.Sh "Sed Addresses" +An address is not required, but if specified must have one of the +following formats: +.Bl -bullet -offset indent +.It +a number that counts +input lines +cumulatively across input files (or in each file independently +if a +.Fl i +option is in effect); +.It +a dollar +.Pq Dq $ +character that addresses the last line of input (or the last line +of the current file if a +.Fl i +option was specified); +.It +a context address +that consists of a regular expression preceded and followed by a +delimiter. The closing delimiter can also optionally be followed by the +.Dq I +character, to indicate that the regular expression is to be matched +in a case-insensitive way. +.El +.Pp +A command line with no addresses selects every pattern space. +.Pp +A command line with one address selects all of the pattern spaces +that match the address. +.Pp +A command line with two addresses selects an inclusive range. +This +range starts with the first pattern space that matches the first +address. +The end of the range is the next following pattern space +that matches the second address. +If the second address is a number +less than or equal to the line number first selected, only that +line is selected. +The number in the second address may be prefixed with a +.Pq Dq \&+ +to specify the number of lines to match after the first pattern. +In the case when the second address is a context +address, +.Nm +does not re-match the second address against the +pattern space that matched the first address. +Starting at the +first line following the selected range, +.Nm +starts looking again for the first address. +.Pp +Editing commands can be applied to non-selected pattern spaces by use +of the exclamation character +.Pq Dq \&! +function. +.Sh "Sed Regular Expressions" +The regular expressions used in +.Nm , +by default, are basic regular expressions (BREs, see +.Xr re_format 7 +for more information), but extended (modern) regular expressions can be used +instead if the +.Fl E +flag is given. +In addition, +.Nm +has the following two additions to regular expressions: +.Pp +.Bl -enum -compact +.It +In a context address, any character other than a backslash +.Pq Dq \e +or newline character may be used to delimit the regular expression. +The opening delimiter needs to be preceded by a backslash +unless it is a slash. +For example, the context address +.Li \exabcx +is equivalent to +.Li /abc/ . +Also, putting a backslash character before the delimiting character +within the regular expression causes the character to be treated literally. +For example, in the context address +.Li \exabc\exdefx , +the RE delimiter is an +.Dq x +and the second +.Dq x +stands for itself, so that the regular expression is +.Dq abcxdef . +.Pp +.It +The escape sequence \en matches a newline character embedded in the +pattern space. +You cannot, however, use a literal newline character in an address or +in the substitute command. +.El +.Pp +One special feature of +.Nm +regular expressions is that they can default to the last regular +expression used. +If a regular expression is empty, i.e., just the delimiter characters +are specified, the last regular expression encountered is used instead. +The last regular expression is defined as the last regular expression +used as part of an address or substitute command, and at run-time, not +compile-time. +For example, the command +.Dq /abc/s//XXX/ +will substitute +.Dq XXX +for the pattern +.Dq abc . +.Sh "Sed Functions" +In the following list of commands, the maximum number of permissible +addresses for each command is indicated by [0addr], [1addr], or [2addr], +representing zero, one, or two addresses. +.Pp +The argument +.Em text +consists of one or more lines. +To embed a newline in the text, precede it with a backslash. +Other backslashes in text are deleted and the following character +taken literally. +.Pp +The +.Dq r +and +.Dq w +functions take an optional file parameter, which should be separated +from the function letter by white space. +Each file given as an argument to +.Nm +is created (or its contents truncated) before any input processing begins. +.Pp +The +.Dq b , +.Dq r , +.Dq s , +.Dq t , +.Dq w , +.Dq y , +.Dq \&! , +and +.Dq \&: +functions all accept additional arguments. +The following synopses indicate which arguments have to be separated from +the function letters by white space characters. +.Pp +Two of the functions take a function-list. +This is a list of +.Nm +functions separated by newlines, as follows: +.Bd -literal -offset indent +{ function + function + ... + function +} +.Ed +.Pp +The +.Dq { +can be preceded by white space and can be followed by white space. +The function can be preceded by white space. +The terminating +.Dq } +must be preceded by a newline or optional white space. +.Pp +.Bl -tag -width "XXXXXX" -compact +.It [2addr] function-list +Execute function-list only when the pattern space is selected. +.Pp +.It [1addr]a\e +.It text +Write +.Em text +to standard output immediately before each attempt to read a line of input, +whether by executing the +.Dq N +function or by beginning a new cycle. +.Pp +.It [2addr]b[label] +Branch to the +.Dq \&: +function with the specified label. +If the label is not specified, branch to the end of the script. +.Pp +.It [2addr]c\e +.It text +Delete the pattern space. +With 0 or 1 address or at the end of a 2-address range, +.Em text +is written to the standard output. +.Pp +.It [2addr]d +Delete the pattern space and start the next cycle. +.Pp +.It [2addr]D +Delete the initial segment of the pattern space through the first +newline character and start the next cycle. +.Pp +.It [2addr]g +Replace the contents of the pattern space with the contents of the +hold space. +.Pp +.It [2addr]G +Append a newline character followed by the contents of the hold space +to the pattern space. +.Pp +.It [2addr]h +Replace the contents of the hold space with the contents of the +pattern space. +.Pp +.It [2addr]H +Append a newline character followed by the contents of the pattern space +to the hold space. +.Pp +.It [1addr]i\e +.It text +Write +.Em text +to the standard output. +.Pp +.It [2addr]l +(The letter ell.) +Write the pattern space to the standard output in a visually unambiguous +form. +This form is as follows: +.Pp +.Bl -tag -width "carriage-returnXX" -offset indent -compact +.It backslash +\e\e +.It alert +\ea +.It form-feed +\ef +.It carriage-return +\er +.It tab +\et +.It vertical tab +\ev +.El +.Pp +Nonprintable characters are written as three-digit octal numbers (with a +preceding backslash) for each byte in the character (most significant byte +first). +Long lines are folded, with the point of folding indicated by displaying +a backslash followed by a newline. +The end of each line is marked with a +.Dq $ . +.Pp +.It [2addr]n +Write the pattern space to the standard output if the default output has +not been suppressed, and replace the pattern space with the next line of +input. +.Pp +.It [2addr]N +Append the next line of input to the pattern space, using an embedded +newline character to separate the appended material from the original +contents. +Note that the current line number changes. +.Pp +.It [2addr]p +Write the pattern space to standard output. +.Pp +.It [2addr]P +Write the pattern space, up to the first newline character to the +standard output. +.Pp +.It [1addr]q +Branch to the end of the script and quit without starting a new cycle. +.Pp +.It [1addr]r file +Copy the contents of +.Em file +to the standard output immediately before the next attempt to read a +line of input. +If +.Em file +cannot be read for any reason, it is silently ignored and no error +condition is set. +.Pp +.It [2addr]s/regular expression/replacement/flags +Substitute the replacement string for the first instance of the regular +expression in the pattern space. +Any character other than backslash or newline can be used instead of +a slash to delimit the RE and the replacement. +Within the RE and the replacement, the RE delimiter itself can be used as +a literal character if it is preceded by a backslash. +.Pp +An ampersand +.Pq Dq & +appearing in the replacement is replaced by the string matching the RE. +The special meaning of +.Dq & +in this context can be suppressed by preceding it by a backslash. +The string +.Dq \e# , +where +.Dq # +is a digit, is replaced by the text matched +by the corresponding backreference expression (see +.Xr re_format 7 ) . +.Pp +A line can be split by substituting a newline character into it. +To specify a newline character in the replacement string, precede it with +a backslash. +.Pp +The value of +.Em flags +in the substitute function is zero or more of the following: +.Bl -tag -width "XXXXXX" -offset indent +.It Ar N +Make the substitution only for the +.Ar N Ns 'th +occurrence of the regular expression in the pattern space. +.It g +Make the substitution for all non-overlapping matches of the +regular expression, not just the first one. +.It p +Write the pattern space to standard output if a replacement was made. +If the replacement string is identical to that which it replaces, it +is still considered to have been a replacement. +.It w Em file +Append the pattern space to +.Em file +if a replacement was made. +If the replacement string is identical to that which it replaces, it +is still considered to have been a replacement. +.It I +Match the regular expression in a case-insensitive way. +.El +.Pp +.It [2addr]t [label] +Branch to the +.Dq \&: +function bearing the label if any substitutions have been made since the +most recent reading of an input line or execution of a +.Dq t +function. +If no label is specified, branch to the end of the script. +.Pp +.It [2addr]w Em file +Append the pattern space to the +.Em file . +.Pp +.It [2addr]x +Swap the contents of the pattern and hold spaces. +.Pp +.It [2addr]y/string1/string2/ +Replace all occurrences of characters in +.Em string1 +in the pattern space with the corresponding characters from +.Em string2 . +Any character other than a backslash or newline can be used instead of +a slash to delimit the strings. +Within +.Em string1 +and +.Em string2 , +a backslash followed by any character other than a newline is that literal +character, and a backslash followed by an ``n'' is replaced by a newline +character. +.Pp +.It [2addr]!function +.It [2addr]!function-list +Apply the function or function-list only to the lines that are +.Em not +selected by the address(es). +.Pp +.It [0addr]:label +This function does nothing; it bears a label to which the +.Dq b +and +.Dq t +commands may branch. +.Pp +.It [1addr]= +Write the line number to the standard output followed by a newline +character. +.Pp +.It [0addr] +Empty lines are ignored. +.Pp +.It [0addr]# +The +.Dq # +and the remainder of the line are ignored (treated as a comment), with +the single exception that if the first two characters in the file are +.Dq #n , +the default output is suppressed. +This is the same as specifying the +.Fl n +option on the command line. +.El +.Sh ENVIRONMENT +The +.Ev COLUMNS , LANG , LC_ALL , LC_CTYPE +and +.Ev LC_COLLATE +environment variables affect the execution of +.Nm +as described in +.Xr environ 7 . +.Sh EXIT STATUS +.Ex -std +.Sh SEE ALSO +.Xr awk 1 , +.Xr ed 1 , +.Xr grep 1 , +.Xr regex 3 , +.Xr re_format 7 +.Sh STANDARDS +The +.Nm +utility is expected to be a superset of the +.St -p1003.2 +specification. +.Pp +The +.Fl E , I , a +and +.Fl i +options, the prefixing +.Dq \&+ +in the second member of an address range, +as well as the +.Dq I +flag to the address regular expression and substitution command are +non-standard +.Fx +extensions and may not be available on other operating systems. +.Sh HISTORY +A +.Nm +command, written by +.An L. E. McMahon , +appeared in +.At v7 . +.Sh AUTHORS +.An "Diomidis D. Spinellis" Aq dds@FreeBSD.org +.Sh BUGS +Multibyte characters containing a byte with value 0x5C +.Tn ( ASCII +.Ql \e ) +may be incorrectly treated as line continuation characters in arguments to the +.Dq a , +.Dq c +and +.Dq i +commands. +Multibyte characters cannot be used as delimiters with the +.Dq s +and +.Dq y +commands.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/cmd/sed/sed.txt Wed Dec 15 17:12:35 2010 -0800 @@ -0,0 +1,391 @@ +SED(1) BSD General Commands Manual SED(1) + +NAME + sed -- stream editor + +SYNOPSIS + sed [-Ealnr] _c_o_m_m_a_n_d [_f_i_l_e _._._.] + sed [-Ealnr] [-e _c_o_m_m_a_n_d] [-f _c_o_m_m_a_n_d___f_i_l_e] [-I _e_x_t_e_n_s_i_o_n] [-i _e_x_t_e_n_s_i_o_n] + [_f_i_l_e _._._.] + +DESCRIPTION + The sed utility reads the specified files, or the standard input if no + files are specified, modifying the input as specified by a list of com- + mands. The input is then written to the standard output. + + A single command may be specified as the first argument to sed. Multiple + commands may be specified by using the -e or -f options. All commands + are applied to the input in the order they are specified regardless of + their origin. + + The following options are available: + + -E Interpret regular expressions as extended (modern) regular + expressions rather than basic regular expressions (BRE's). The + re_format(7) manual page fully describes both formats. + + -a The files listed as parameters for the ``w'' functions are cre- + ated (or truncated) before any processing begins, by default. + The -a option causes sed to delay opening each file until a com- + mand containing the related ``w'' function is applied to a line + of input. + + -e _c_o_m_m_a_n_d + Append the editing commands specified by the _c_o_m_m_a_n_d argument to + the list of commands. + + -f _c_o_m_m_a_n_d___f_i_l_e + Append the editing commands found in the file _c_o_m_m_a_n_d___f_i_l_e to the + list of commands. The editing commands should each be listed on + a separate line. + + -I _e_x_t_e_n_s_i_o_n + Edit files in-place, saving backups with the specified _e_x_t_e_n_s_i_o_n. + If a zero-length _e_x_t_e_n_s_i_o_n is given, no backup will be saved. It + is not recommended to give a zero-length _e_x_t_e_n_s_i_o_n when in-place + editing files, as you risk corruption or partial content in situ- + ations where disk space is exhausted, etc. + + Note that in-place editing with -I still takes place in a single + continuous line address space covering all files, although each + file preserves its individuality instead of forming one output + stream. The line counter is never reset between files, address + ranges can span file boundaries, and the ``$'' address matches + only the last line of the last file. (See _S_e_d _A_d_d_r_e_s_s_e_s.) That + can lead to unexpected results in many cases of in-place editing, + where using -i is desired. + + -i _e_x_t_e_n_s_i_o_n + Edit files in-place similarly to -I, but treat each file indepen- + dently from other files. In particular, line numbers in each + file start at 1, the ``$'' address matches the last line of the + current file, and address ranges are limited to the current file. + (See _S_e_d _A_d_d_r_e_s_s_e_s.) The net result is as though each file were + edited by a separate sed instance. + + -l Make output line buffered. + + -n By default, each line of input is echoed to the standard output + after all of the commands have been applied to it. The -n option + suppresses this behavior. + + -r Same as -E for compatibility with GNU sed. + + The form of a sed command is as follows: + + [address[,address]]function[arguments] + + Whitespace may be inserted before the first address and the function por- + tions of the command. + + Normally, sed cyclically copies a line of input, not including its termi- + nating newline character, into a _p_a_t_t_e_r_n _s_p_a_c_e, (unless there is some- + thing left after a ``D'' function), applies all of the commands with + addresses that select that pattern space, copies the pattern space to the + standard output, appending a newline, and deletes the pattern space. + + Some of the functions use a _h_o_l_d _s_p_a_c_e to save all or part of the pattern + space for subsequent retrieval. + +Sed Addresses + An address is not required, but if specified must have one of the follow- + ing formats: + + +o a number that counts input lines cumulatively across input + files (or in each file independently if a -i option is in + effect); + + +o a dollar (``$'') character that addresses the last line of + input (or the last line of the current file if a -i option was + specified); + + +o a context address that consists of a regular expression pre- + ceded and followed by a delimiter. The closing delimiter can + also optionally be followed by the ``I'' character, to indicate + that the regular expression is to be matched in a case-insensi- + tive way. + + A command line with no addresses selects every pattern space. + + A command line with one address selects all of the pattern spaces that + match the address. + + A command line with two addresses selects an inclusive range. This range + starts with the first pattern space that matches the first address. The + end of the range is the next following pattern space that matches the + second address. If the second address is a number less than or equal to + the line number first selected, only that line is selected. The number + in the second address may be prefixed with a (``+'') to specify the num- + ber of lines to match after the first pattern. In the case when the sec- + ond address is a context address, sed does not re-match the second + address against the pattern space that matched the first address. Start- + ing at the first line following the selected range, sed starts looking + again for the first address. + + Editing commands can be applied to non-selected pattern spaces by use of + the exclamation character (``!'') function. + +Sed Regular Expressions + The regular expressions used in sed, by default, are basic regular + expressions (BREs, see re_format(7) for more information), but extended + (modern) regular expressions can be used instead if the -E flag is given. + In addition, sed has the following two additions to regular expressions: + + 1. In a context address, any character other than a backslash (``\'') + or newline character may be used to delimit the regular expression. + The opening delimiter needs to be preceded by a backslash unless it + is a slash. For example, the context address \xabcx is equivalent + to /abc/. Also, putting a backslash character before the delimiting + character within the regular expression causes the character to be + treated literally. For example, in the context address \xabc\xdefx, + the RE delimiter is an ``x'' and the second ``x'' stands for itself, + so that the regular expression is ``abcxdef''. + + 2. The escape sequence \n matches a newline character embedded in the + pattern space. You cannot, however, use a literal newline character + in an address or in the substitute command. + + One special feature of sed regular expressions is that they can default + to the last regular expression used. If a regular expression is empty, + i.e., just the delimiter characters are specified, the last regular + expression encountered is used instead. The last regular expression is + defined as the last regular expression used as part of an address or sub- + stitute command, and at run-time, not compile-time. For example, the + command ``/abc/s//XXX/'' will substitute ``XXX'' for the pattern ``abc''. + +Sed Functions + In the following list of commands, the maximum number of permissible + addresses for each command is indicated by [0addr], [1addr], or [2addr], + representing zero, one, or two addresses. + + The argument _t_e_x_t consists of one or more lines. To embed a newline in + the text, precede it with a backslash. Other backslashes in text are + deleted and the following character taken literally. + + The ``r'' and ``w'' functions take an optional file parameter, which + should be separated from the function letter by white space. Each file + given as an argument to sed is created (or its contents truncated) before + any input processing begins. + + The ``b'', ``r'', ``s'', ``t'', ``w'', ``y'', ``!'', and ``:'' functions + all accept additional arguments. The following synopses indicate which + arguments have to be separated from the function letters by white space + characters. + + Two of the functions take a function-list. This is a list of sed func- + tions separated by newlines, as follows: + + { function + function + ... + function + } + + The ``{'' can be preceded by white space and can be followed by white + space. The function can be preceded by white space. The terminating + ``}'' must be preceded by a newline or optional white space. + + [2addr] function-list + Execute function-list only when the pattern space is selected. + + [1addr]a\ + text Write _t_e_x_t to standard output immediately before each attempt to + read a line of input, whether by executing the ``N'' function or + by beginning a new cycle. + + [2addr]b[label] + Branch to the ``:'' function with the specified label. If the + label is not specified, branch to the end of the script. + + [2addr]c\ + text Delete the pattern space. With 0 or 1 address or at the end of a + 2-address range, _t_e_x_t is written to the standard output. + + [2addr]d + Delete the pattern space and start the next cycle. + + [2addr]D + Delete the initial segment of the pattern space through the first + newline character and start the next cycle. + + [2addr]g + Replace the contents of the pattern space with the contents of + the hold space. + + [2addr]G + Append a newline character followed by the contents of the hold + space to the pattern space. + + [2addr]h + Replace the contents of the hold space with the contents of the + pattern space. + + [2addr]H + Append a newline character followed by the contents of the pat- + tern space to the hold space. + + [1addr]i\ + text Write _t_e_x_t to the standard output. + + [2addr]l + (The letter ell.) Write the pattern space to the standard output + in a visually unambiguous form. This form is as follows: + + backslash \\ + alert \a + form-feed \f + carriage-return \r + tab \t + vertical tab \v + + Nonprintable characters are written as three-digit octal numbers + (with a preceding backslash) for each byte in the character (most + significant byte first). Long lines are folded, with the point + of folding indicated by displaying a backslash followed by a new- + line. The end of each line is marked with a ``$''. + + [2addr]n + Write the pattern space to the standard output if the default + output has not been suppressed, and replace the pattern space + with the next line of input. + + [2addr]N + Append the next line of input to the pattern space, using an + embedded newline character to separate the appended material from + the original contents. Note that the current line number + changes. + + [2addr]p + Write the pattern space to standard output. + + [2addr]P + Write the pattern space, up to the first newline character to the + standard output. + + [1addr]q + Branch to the end of the script and quit without starting a new + cycle. + + [1addr]r file + Copy the contents of _f_i_l_e to the standard output immediately + before the next attempt to read a line of input. If _f_i_l_e cannot + be read for any reason, it is silently ignored and no error con- + dition is set. + + [2addr]s/regular expression/replacement/flags + Substitute the replacement string for the first instance of the + regular expression in the pattern space. Any character other + than backslash or newline can be used instead of a slash to + delimit the RE and the replacement. Within the RE and the + replacement, the RE delimiter itself can be used as a literal + character if it is preceded by a backslash. + + An ampersand (``&'') appearing in the replacement is replaced by + the string matching the RE. The special meaning of ``&'' in this + context can be suppressed by preceding it by a backslash. The + string ``\#'', where ``#'' is a digit, is replaced by the text + matched by the corresponding backreference expression (see + re_format(7)). + + A line can be split by substituting a newline character into it. + To specify a newline character in the replacement string, precede + it with a backslash. + + The value of _f_l_a_g_s in the substitute function is zero or more of + the following: + + _N Make the substitution only for the _N'th occurrence + of the regular expression in the pattern space. + + g Make the substitution for all non-overlapping + matches of the regular expression, not just the + first one. + + p Write the pattern space to standard output if a + replacement was made. If the replacement string is + identical to that which it replaces, it is still + considered to have been a replacement. + + w _f_i_l_e Append the pattern space to _f_i_l_e if a replacement + was made. If the replacement string is identical + to that which it replaces, it is still considered + to have been a replacement. + + I Match the regular expression in a case-insensitive + way. + + [2addr]t [label] + Branch to the ``:'' function bearing the label if any substitu- + tions have been made since the most recent reading of an input + line or execution of a ``t'' function. If no label is specified, + branch to the end of the script. + + [2addr]w _f_i_l_e + Append the pattern space to the _f_i_l_e. + + [2addr]x + Swap the contents of the pattern and hold spaces. + + [2addr]y/string1/string2/ + Replace all occurrences of characters in _s_t_r_i_n_g_1 in the pattern + space with the corresponding characters from _s_t_r_i_n_g_2. Any char- + acter other than a backslash or newline can be used instead of a + slash to delimit the strings. Within _s_t_r_i_n_g_1 and _s_t_r_i_n_g_2, a + backslash followed by any character other than a newline is that + literal character, and a backslash followed by an ``n'' is + replaced by a newline character. + + [2addr]!function + [2addr]!function-list + Apply the function or function-list only to the lines that are + _n_o_t selected by the address(es). + + [0addr]:label + This function does nothing; it bears a label to which the ``b'' + and ``t'' commands may branch. + + [1addr]= + Write the line number to the standard output followed by a new- + line character. + + [0addr] + Empty lines are ignored. + + [0addr]# + The ``#'' and the remainder of the line are ignored (treated as a + comment), with the single exception that if the first two charac- + ters in the file are ``#n'', the default output is suppressed. + This is the same as specifying the -n option on the command line. + +ENVIRONMENT + The COLUMNS, LANG, LC_ALL, LC_CTYPE and LC_COLLATE environment variables + affect the execution of sed as described in environ(7). + +EXIT STATUS + The sed utility exits 0 on success, and >0 if an error occurs. + +SEE ALSO + awk(1), ed(1), grep(1), regex(3), re_format(7) + +STANDARDS + The sed utility is expected to be a superset of the IEEE Std 1003.2 + (``POSIX.2'') specification. + + The -E, -I, -a and -i options, the prefixing ``+'' in the second member + of an address range, as well as the ``I'' flag to the address regular + expression and substitution command are non-standard FreeBSD extensions + and may not be available on other operating systems. + +HISTORY + A sed command, written by L. E. McMahon, appeared in Version 7 AT&T UNIX. + +AUTHORS + Diomidis D. Spinellis <dds@FreeBSD.org> + +BUGS + Multibyte characters containing a byte with value 0x5C (ASCII `\') may be + incorrectly treated as line continuation characters in arguments to the + ``a'', ``c'' and ``i'' commands. Multibyte characters cannot be used as + delimiters with the ``s'' and ``y'' commands. + +BSD May 24, 2009 BSD
--- a/usr/src/head/regex.h Thu Dec 09 17:47:03 2010 -0800 +++ b/usr/src/head/regex.h Wed Dec 15 17:12:35 2010 -0800 @@ -88,6 +88,7 @@ #define REG_DUMP 0x2000 #define REG_PEND 0x4000 #define REG_NOSPEC 0x8000 +#define REG_STARTEND 0x10000 /* internal flags */ #define REG_MUST 0x100 /* check for regmust substring */
--- a/usr/src/lib/libc/port/locale/engine.c Thu Dec 09 17:47:03 2010 -0800 +++ b/usr/src/lib/libc/port/locale/engine.c Wed Dec 15 17:12:35 2010 -0800 @@ -172,7 +172,7 @@ /* simplify the situation where possible */ if (g->cflags®_NOSUB) nmatch = 0; -#ifdef REG_STARTEND + if (eflags®_STARTEND) { start = string + pmatch[0].rm_so; stop = string + pmatch[0].rm_eo; @@ -180,10 +180,7 @@ start = string; stop = start + strlen(start); } -#else - start = string; - stop = start + strlen(start); -#endif + if (stop < start) return (REG_EFATAL);
--- a/usr/src/lib/libc/port/locale/regcomp.c Thu Dec 09 17:47:03 2010 -0800 +++ b/usr/src/lib/libc/port/locale/regcomp.c Wed Dec 15 17:12:35 2010 -0800 @@ -392,7 +392,17 @@ case '\\': (void) REQUIRE(MORE(), REG_EESCAPE); wc = WGETNEXT(); - ordinary(p, wc); + switch (wc) { + case '<': + EMIT(OBOW, 0); + break; + case '>': + EMIT(OEOW, 0); + break; + default: + ordinary(p, wc); + break; + } break; case '{': /* okay as ordinary except if digit follows */ (void) REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT); @@ -545,6 +555,12 @@ case '[': p_bracket(p); break; + case BACKSL|'<': + EMIT(OBOW, 0); + break; + case BACKSL|'>': + EMIT(OEOW, 0); + break; case BACKSL|'{': SETERROR(REG_BADRPT); break;
--- a/usr/src/pkg/manifests/SUNWcs.mf Thu Dec 09 17:47:03 2010 -0800 +++ b/usr/src/pkg/manifests/SUNWcs.mf Wed Dec 15 17:12:35 2010 -0800 @@ -2560,6 +2560,8 @@ license=usr/src/cmd/mt/THIRDPARTYLICENSE license usr/src/cmd/script/THIRDPARTYLICENSE \ license=usr/src/cmd/script/THIRDPARTYLICENSE +license usr/src/cmd/sed/THIRDPARTYLICENSE \ + license=usr/src/cmd/sed/THIRDPARTYLICENSE license usr/src/cmd/stat/vmstat/THIRDPARTYLICENSE \ license=usr/src/cmd/stat/vmstat/THIRDPARTYLICENSE license usr/src/cmd/tail/THIRDPARTYLICENSE \
--- a/usr/src/pkg/manifests/system-xopen-xcu4.mf Thu Dec 09 17:47:03 2010 -0800 +++ b/usr/src/pkg/manifests/system-xopen-xcu4.mf Wed Dec 15 17:12:35 2010 -0800 @@ -65,7 +65,6 @@ file path=usr/xpg4/bin/nohup mode=0555 file path=usr/xpg4/bin/pr mode=0555 file path=usr/xpg4/bin/rm mode=0555 -file path=usr/xpg4/bin/sed mode=0555 file path=usr/xpg4/bin/sort mode=0555 file path=usr/xpg4/bin/stty mode=0555 file path=usr/xpg4/bin/who mode=0555 @@ -99,6 +98,7 @@ link path=usr/xpg4/bin/od target=../../bin/od link path=usr/xpg4/bin/pfsh target=../../bin/pfexec link path=usr/xpg4/bin/read target=../../bin/alias +link path=usr/xpg4/bin/sed target=../../bin/sed link path=usr/xpg4/bin/sh target=../../bin/ksh93 link path=usr/xpg4/bin/tail target=../../bin/tail link path=usr/xpg4/bin/test target=../../bin/alias