diff usr/src/cmd/xargs/xargs.c @ 14179:f1ab3ae29311

672 xargs doesn't support -0 3356 xargs needlessly duplicates stdio buffering Reviewed by: Prasad Joshi <prasadjoshi124@gmail.com> Reviewed by: Gary Mills <gary_mills@fastmail.fm> Approved by: Robert Mustacchi <rm@joyent.com>
author Garrett D'Amore <garrett@dey-sys.com>
date Tue, 27 Aug 2013 13:00:09 -0700
parents f9a352c91678
children
line wrap: on
line diff
--- a/usr/src/cmd/xargs/xargs.c	Fri Aug 23 15:33:55 2013 -0400
+++ b/usr/src/cmd/xargs/xargs.c	Tue Aug 27 13:00:09 2013 -0700
@@ -19,6 +19,16 @@
  * CDDL HEADER END
  */
 /*
+ * Copyright 2012 DEY Storage Systems, Inc.  All rights reserved.
+ *
+ * Portions of this file developed by DEY Storage Systems, Inc. are licensed
+ * under the terms of the Common Development and Distribution License (CDDL)
+ * version 1.0 only.  The use of subsequent versions of the License are
+ * is specifically prohibited unless those terms are not in conflict with
+ * version 1.0 of the License.  You can find this license on-line at
+ * http://www.illumos.org/license/CDDL
+ */
+/*
  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
@@ -27,8 +37,6 @@
 /*	  All Rights Reserved  	*/
 
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <stdio.h>
 #include <sys/types.h>
 #include <sys/wait.h>
@@ -62,11 +70,32 @@
 #define	QBUF_STARTLEN 255  /* start size of growable string buffer */
 #define	QBUF_INC 100	   /* how much to grow a growable string by */
 
+/* We use these macros to help make formatting look "consistent" */
+#define	EMSG(s)		ermsg(gettext(s "\n"))
+#define	EMSG2(s, a)	ermsg(gettext(s "\n"), a)
+#define	PERR(s)		perror(gettext("xargs: " s))
+
+/* Some common error messages */
+
+#define	LIST2LONG	"Argument list too long"
+#define	ARG2LONG	"A single argument was greater than %d bytes"
+#define	MALLOCFAIL	"Memory allocation failure"
+#define	CORRUPTFILE	"Corrupt input file"
+#define	WAITFAIL	"Wait failure"
+#define	CHILDSIG	"Child killed with signal %d"
+#define	CHILDFAIL	"Command could not continue processing data"
+#define	FORKFAIL	"Could not fork child"
+#define	EXECFAIL	"Could not exec command"
+#define	MISSQUOTE	"Missing quote"
+#define	BADESCAPE	"Incomplete escape"
+#define	IBUFOVERFLOW	"Insert buffer overflow"
+
+#define	_(x)	gettext(x)
+
 static wctype_t	blank;
 static char	*arglist[MAXARGS+1];
-static char	argbuf[BUFSIZE+1];
-static char	*next = argbuf;
-static char	*lastarg = "";
+static char	argbuf[BUFSIZE * 2 + 1];
+static char	lastarg[BUFSIZE + 1];
 static char	**ARGV = arglist;
 static char	*LEOF = "_";
 static char	*INSPAT = INSPAT_STR;
@@ -78,7 +107,6 @@
 	char	*p_skel;	/* ptr to arg template */
 } saveargv[MAXINSERTS];
 
-static off_t	file_offset = 0;
 static int	PROMPT = -1;
 static int	BUFLIM = BUFSIZE;
 static int	N_ARGS = 0;
@@ -92,28 +120,13 @@
 static int	LEGAL = FALSE;
 static int	TRACE = FALSE;
 static int	INSERT = FALSE;
+static int	ZERO = FALSE;
 static int	linesize = 0;
 static int	ibufsize = 0;
 static int	exitstat = 0;	/* our exit status			*/
 static int	mac;		/* modified argc, after parsing		*/
 static char	**mav;		/* modified argv, after parsing		*/
 static int	n_inserts;	/* # of insertions.			*/
-static int	inquote = 0;	/* processing a quoted string		*/
-static int	save_index = 0;
-
-/*
- * the pio structure is used to save any pending input before the
- * user replies to a prompt. the pending input is saved here,
- * for the appropriate processing later.
- */
-typedef struct pio {
-	struct pio *next;	/* next in stack			*/
-	char *start;		/* starting addr of the buffer		*/
-	char *cur;		/* ptr to current char in buf		*/
-	size_t length;		/* number of bytes remaining		*/
-} pio;
-
-static pio *queued_data = NULL;
 
 /* our usage message:							*/
 #define	USAGEMSG "Usage: xargs: [-t] [-p] [-e[eofstr]] [-E eofstr] "\
@@ -121,21 +134,16 @@
 	"[cmd [args ...]]\n"
 
 static int	echoargs();
-static int	getchr(void);
-static wchar_t	getwchr(void);
-static void	ungetwchr(wchar_t);
+static wint_t	getwchr(char *, size_t *);
 static int	lcall(char *sub, char **subargs);
-static int	xindex(char *as1, char *as2);
 static void	addibuf(struct inserts *p);
 static void	ermsg(char *messages, ...);
 static char	*addarg(char *arg);
-static char	*checklen(char *arg);
-static size_t   store_wchr(char **, size_t *, size_t, wchar_t);
-static char	*getarg();
+static void	store_str(char **, char *, size_t);
+static char	*getarg(char *);
 static char	*insert(char *pattern, char *subst);
 static void	usage();
 static void	parseargs();
-static void	saveinput();
 
 int
 main(int argc, char **argv)
@@ -144,8 +152,9 @@
 	struct inserts *psave;
 	int c;
 	int	initsize;
-	char	*cmdname, *initbuf, **initlist;
-
+	char	*cmdname, **initlist;
+	char	*arg;
+	char	*next;
 
 	/* initialization */
 	blank = wctype("blank");
@@ -157,22 +166,26 @@
 #endif
 	(void) textdomain(TEXT_DOMAIN);
 	if (init_yes() < 0) {
-		ermsg(gettext(ERR_MSG_INIT_YES), strerror(errno));
+		ermsg(_(ERR_MSG_INIT_YES), strerror(errno));
 		exit(1);
 	}
 
 	parseargs(argc, argv);
 
 	/* handling all of xargs arguments:				*/
-	while ((c = getopt(mac, mav, "tpe:E:I:i:L:l:n:s:x")) != EOF) {
+	while ((c = getopt(mac, mav, "0tpe:E:I:i:L:l:n:s:x")) != EOF) {
 		switch (c) {
+		case '0':
+			ZERO = TRUE;
+			break;
+
 		case 't':	/* -t: turn trace mode on		*/
 			TRACE = TRUE;
 			break;
 
 		case 'p':	/* -p: turn on prompt mode.		*/
 			if ((PROMPT = open("/dev/tty", O_RDONLY)) == -1) {
-				perror(gettext("can't read from tty for -p"));
+				PERR("can't read from tty for -p");
 			} else {
 				TRACE = TRUE;
 			}
@@ -202,8 +215,8 @@
 			N_ARGS = 0;
 			INSPAT = optarg;
 			if (*optarg == '\0') {
-				ermsg(gettext(
-				    "Option requires an argument: -%c\n"), c);
+				ermsg(_("Option requires an argument: -%c\n"),
+				    c);
 			}
 			break;
 
@@ -245,8 +258,8 @@
 			N_ARGS = 0;
 			INSERT = FALSE;
 			if ((PER_LINE = atoi(optarg)) <= 0) {
-				ermsg(gettext("#lines must be positive "
-				    "int: %s\n"), optarg);
+				ermsg(_("#lines must be positive int: %s\n"),
+				    optarg);
 			}
 			break;
 
@@ -275,8 +288,8 @@
 			 * number *is* required here:
 			 */
 			if ((N_ARGS = atoi(optarg)) <= 0) {
-				ermsg(gettext("#args must be positive "
-				    "int: %s\n"), optarg);
+				ermsg(_("#args must be positive int: %s\n"),
+				    optarg);
 			} else {
 				LEGAL = DASHX || N_ARGS == 1;
 				INSERT = PER_LINE = FALSE;
@@ -286,9 +299,8 @@
 		case 's':	/* -s size: set max size of each arg list */
 			BUFLIM = atoi(optarg);
 			if (BUFLIM > BUFSIZE || BUFLIM <= 0) {
-				ermsg(gettext(
-				    "0 < max-cmd-line-size <= %d: "
-				    "%s\n"), BUFSIZE, optarg);
+				ermsg(_("0 < max-cmd-line-size <= %d: %s\n"),
+				    BUFSIZE, optarg);
 			}
 			break;
 
@@ -300,9 +312,7 @@
 			/*
 			 * bad argument. complain and get ready to die.
 			 */
-			ERR = TRUE;
 			usage();
-
 			exit(2);
 			break;
 		}
@@ -313,7 +323,6 @@
 	 * we exit early.
 	 */
 	if (OK == FALSE) {
-		ERR = TRUE;
 		usage();
 		exit(2);
 	}
@@ -346,9 +355,9 @@
 			 * work to do:
 			 */
 			if (INSERT && ! ERR) {
-				if (xindex(*mav, INSPAT) != -1) {
+				if (strstr(*mav, INSPAT) != NULL) {
 					if (++n_inserts > MAXINSERTS) {
-						ermsg(gettext("too many args "
+						ermsg(_("too many args "
 						    "with %s\n"), INSPAT);
 						ERR = TRUE;
 					}
@@ -362,39 +371,77 @@
 
 	/* pick up args from standard input */
 
-	initbuf = next;
 	initlist = ARGV;
 	initsize = linesize;
+	lastarg[0] = '\0';
 
-	while (OK && MORE) {
+	while (OK) {
 		N_args = 0;
 		N_lines = 0;
-		next = initbuf;
 		ARGV = initlist;
 		linesize = initsize;
-		if (*lastarg) {
-			*ARGV++ = addarg(lastarg);
-			lastarg = "";
-		}
+		next = argbuf;
+
+		while (MORE || (lastarg[0] != '\0')) {
+			int l;
+
+			if (*lastarg != '\0') {
+				arg = strcpy(next, lastarg);
+				*lastarg = '\0';
+			} else if ((arg = getarg(next)) == NULL) {
+				break;
+			}
+
+			l = strlen(arg) + 1;
+			linesize += l;
+			next += l;
 
-		while (((*ARGV++ = getarg()) != NULL) && OK) {
+			/* Inserts are handled specially later. */
+			if ((n_inserts == 0) && (linesize >= BUFLIM)) {
+				/*
+				 * Legal indicates hard fail if the list is
+				 * truncated due to size.  So fail, or if we
+				 * cannot create any list because it would be
+				 * too big.
+				 */
+				if (LEGAL || N_args == 0) {
+					EMSG(LIST2LONG);
+					exit(2);
+					/* NOTREACHED */
+				}
+
+				/*
+				 * Otherwise just save argument for later.
+				 */
+				(void) strcpy(lastarg, arg);
+				break;
+			}
+
+			*ARGV++ = arg;
+
+			N_args++;
+
+			if ((PER_LINE && N_lines >= PER_LINE) ||
+			    (N_ARGS && (N_args) >= N_ARGS)) {
+				break;
+			}
+
+
 			if ((ARGV - arglist) == MAXARGS) {
-				save_index = ARGV - arglist;
 				break;
 			}
 		}
-		if ((save_index == MAXARGS) && !MORE && (N_args == 0)) {
-			/* there were no more args after filling arglist */
+
+		*ARGV = NULL;
+		if (N_args == 0) {
+			/* Reached the end with no more work. */
 			exit(exitstat);
 		}
 
 		/* insert arg if requested */
 
 		if (!ERR && INSERT) {
-			if ((!MORE) && (N_lines == 0)) {
-				exit(exitstat);
-			}
-					/* no more input lines */
+
 			p_ibuf = ins_buf;
 			ARGV--;
 			j = ibufsize = 0;
@@ -404,31 +451,22 @@
 					break;
 			}
 		}
-		*ARGV = 0;
+		*ARGV = NULL;
 
 		if (n_inserts > 0) {
-			int t_ninserts;
-
 			/*
 			 * if we've done any insertions, re-calculate the
 			 * linesize. bomb out if we've exceeded our length.
 			 */
-			t_ninserts = n_inserts;
-			n_inserts = 0;	/* inserts have been done 	*/
-			linesize = 0;	/* recalculate this		*/
-
-			/* for each current argument in the list:	*/
+			linesize = 0;
 			for (ARGV = arglist; *ARGV != NULL; ARGV++) {
-				/* recalculate everything.		*/
-				if (checklen(*ARGV) != 0) {
-					if (N_ARGS && (N_args >= N_ARGS)) {
-						N_lines = N_args = 0;
-						OK = FALSE;
-						ERR = TRUE;
-					}
-				}
+				linesize += strlen(*ARGV) + 1;
 			}
-			n_inserts = t_ninserts;
+			if (linesize >= BUFLIM) {
+				EMSG(LIST2LONG);
+				exit(2);
+				/* NOTREACHED */
+			}
 		}
 
 		/* exec command */
@@ -446,334 +484,161 @@
 				 * so if we have a non-zero status here,
 				 * quit immediately.
 				 */
-				if ((exitstat |= lcall(cmdname, arglist)) == 0)
-					continue;
+				exitstat |= lcall(cmdname, arglist);
 			}
 		}
 	}
 
-	(void) lseek(0, file_offset, SEEK_SET);
-	if (OK) {
+	if (OK)
 		return (exitstat);
-	} else {
-		/*
-		 * if exitstat was set, to match XCU4 complience,
-		 * return that value, otherwise, return 1.
-		 */
-		return (exitstat ? exitstat : 1);
-	}
-}
-
-static void
-queue(char *buffer, int len, int where)
-{
-	pio *new, *element;
-
-	if ((new = malloc(sizeof (pio))) == NULL) {
-		perror(gettext("xargs: Memory allocation failure"));
-		exit(1);
-	}
-	new->cur = new->start = buffer;
-	new->length = len;
 
-	if (where == TAIL) {
-		new->next = NULL;
-		if (queued_data == NULL) {
-			queued_data = new;
-		} else {
-			element = queued_data;
-			while (element->next != NULL) {
-				element = element->next;
-			}
-			element->next = new;
-		}
-	} else {
-		file_offset -= len;
-		new->next = queued_data;
-		queued_data = new;
-	}
-}
-
-static char *
-checklen(char *arg)
-{
-	int	oklen;
-
-	oklen = TRUE;
-	linesize += strlen(arg) + 1;
-	if (linesize >= BUFLIM) {
-		/*
-		 * we skip this if there're inserts. we'll handle the
-		 * argument counting after all the insertions have
-		 * been done.
-		 */
-		if (n_inserts == 0) {
-			lastarg = arg;
-			oklen = OK = FALSE;
-
-			if (LEGAL) {
-				ERR = TRUE;
-				ermsg(gettext("arg list too long\n"));
-			} else if (N_args > 1) {
-				N_args = 1;
-			} else {
-				ermsg(gettext("a single arg was greater than "
-				    "the max arglist size of %d characters\n"),
-				    BUFLIM);
-				ERR = TRUE;
-			}
-		}
-	}
-	return (oklen ? arg : 0);
+	/*
+	 * if exitstat was set, to match XCU4 complience,
+	 * return that value, otherwise, return 1.
+	 */
+	return (exitstat ? exitstat : 1);
 }
 
 static char *
 addarg(char *arg)
 {
-	if (checklen(arg) != 0) {
-		(void) strcpy(next, arg);
-		arg = next;
-		next += strlen(arg) + 1;
-		return (arg);
-	}
-	return ((char *)0);
+	linesize += (strlen(arg) + 1);
+	return (arg);
 }
 
-/*
- * store_wchr() : append a wchar_t to a char buffer, resize buffer if required.
- *
- *     Given a pointer to the beginning of a string buffer, the length of the
- *     buffer and an offset indicating the next place to write within that
- *     buffer, the passed wchar_t will be appended to the buffer if there is
- *     enough space. If there is not enough space, an attempt to reallocate the
- *     buffer will be made and if successful the passed pointer and size will be
- *     updated to describe the reallocated block. Returns the new value for
- *     'offset' (it will be incremented by the number of bytes written).
- */
-static size_t
-store_wchr(char **buffer, size_t *buflen, size_t offset, wchar_t c)
-{
-	int bytes;
 
-	/*
-	 * Make sure that there is enough room in the buffer to store the
-	 * maximum length of c.
-	 */
-	if ((offset + MB_CUR_MAX) > *buflen) {
-		/*
-		 * Not enough room so attempt to reallocate. Add 'MB_CUR_MAX' to
-		 * buffer length to ensure that there is always enough room to
-		 * store 'c' if realloc succeeds, no matter what QBUF_INC is
-		 * defined as.
-		 */
-		*buflen += (QBUF_INC + MB_CUR_MAX);
-		if ((*buffer = realloc(*buffer, *buflen)) == NULL) {
-			perror(gettext("xargs: Memory allocation failure"));
-			exit(1);
-		}
-	}
-	/* store bytes from wchar into buffer */
-	bytes = wctomb(*buffer + offset, c);
-	if (bytes == -1) {
-		/* char was invalid */
-		bytes = 1;
-		*(*buffer + offset) = (char)c;
-	}
+static void
+store_str(char **buffer, char *str, size_t len)
+{
+	(void) memcpy(*buffer, str, len);
+	(*buffer)[len] = '\0';
+	*buffer += len;
+}
 
-	/* return new value for offset */
-	return (offset + bytes);
-}
 
 static char *
-getarg()
+getarg(char *arg)
 {
-	int	bytes;
+	char	*xarg = arg;
 	wchar_t	c;
-	char	*arg;
-	char	*retarg, *requeue_buf;
-	size_t  requeue_offset = 0, requeue_len;
 	char	mbc[MB_LEN_MAX];
-
-	while (iswspace(c = getwchr()) || c == '\n')
-		;
-
-	if (c == '\0') {
-		MORE = FALSE;
-		return (0);
-	}
-
-	/*
-	 * While we are reading in an argument, it is possible that we will
-	 * reach the maximum length of the overflow buffer and we'll have to
-	 * requeue what we have read so far. To handle this we allocate an
-	 * initial buffer here which will keep an unprocessed copy of the data
-	 * that we read in (this buffer will grow as required).
-	 */
-	requeue_len = (size_t)QBUF_STARTLEN;
-	if ((requeue_buf = (char *)malloc(requeue_len)) == NULL) {
-		perror(gettext("xargs: Memory allocation failure"));
-		exit(1);
-	}
-
-	for (arg = next; ; c = getwchr()) {
-		bytes = wctomb(mbc, c);
+	size_t	len;
+	int	escape = 0;
+	int	inquote = 0;
 
-		/*
-		 * Store the char that we have read before processing it in case
-		 * the current argument needs to be requeued.
-		 */
-		requeue_offset = store_wchr(&requeue_buf, &requeue_len,
-		    requeue_offset, c);
+	arg[0] = '\0';
+
+	while (MORE) {
 
-		/* Check for overflow the input buffer */
-		if ((next + ((bytes == -1) ? 1 : bytes)) >= &argbuf[BUFLIM]) {
-			/*
-			 * It's only an error if there are no Args in buffer
-			 * already.
-			 */
-			if ((N_ARGS || PER_LINE) && LEGAL) {
-				ERR = TRUE;
-				ermsg(gettext("Argument list too long\n"));
-				free(requeue_buf);
-				return (0);
-			} else if (N_args == 0) {
-				lastarg = "";
-				ERR = TRUE;
-				ermsg(gettext("A single arg was greater than "
-				    "the max arglist size of %d characters\n"),
-				    BUFSIZE);
-				free(requeue_buf);
-				return (0);
-			}
-			/*
-			 * Otherwise we put back the current argument
-			 * and use what we have collected so far...
-			 */
-			queue(requeue_buf, requeue_offset, HEAD);
-			/* reset inquote because we have requeued the quotes */
-			inquote = 0;
+		len = 0;
+		c = getwchr(mbc, &len);
+
+		if (((arg - xarg) + len) > BUFLIM) {
+			EMSG2(ARG2LONG, BUFLIM);
+			exit(2);
+			ERR = TRUE;
 			return (NULL);
 		}
 
-
-		if (iswctype(c, blank) && inquote == 0) {
-			if (INSERT) {
-				if (bytes == -1) {
-					*next++ = (char)c;
-				} else {
-					(void) wctomb(next, c);
-					next += bytes;
-				}
+		switch (c) {
+		case '\n':
+			if (ZERO) {
+				store_str(&arg, mbc, len);
 				continue;
 			}
+			/* FALLTHRU */
 
-			/* skip over trailing whitespace till next arg */
-			while (iswctype((c = getwchr()), blank) &&
-			    (c != '\n') && (c != '\0'))
-				;
+		case '\0':
+		case WEOF:	/* Note WEOF == EOF */
 
-			/*
-			 * if there was space till end of line then the last
-			 * character was really a newline...
-			 */
-			if (c == L'\n' || c == L'\0') {
-				ungetwchr(L'\n');
-			} else {
-				/* later code needs to know this was a space */
-				ungetwchr(c);
-				c = L' ';
+			if (escape) {
+				EMSG(BADESCAPE);
+				ERR = TRUE;
+				return (NULL);
 			}
-			goto end_arg;
-		}
-		switch (c) {
-		case L'\0':
-		case L'\n':
 			if (inquote) {
-				*next++ = '\0';
-				ermsg(gettext("Missing quote: %s\n"), arg);
+				EMSG(MISSQUOTE);
 				ERR = TRUE;
-				free(requeue_buf);
-				return (0);
+				return (NULL);
 			}
 
 			N_lines++;
-end_arg:		*next++ = '\0';
-			/* we finished without requeuing so free requeue_buf */
-			free(requeue_buf);
-			if ((strcmp(arg, LEOF) == 0 && *LEOF != '\0') ||
-			    (c == '\0' && strlen(arg) == 0)) {
-				MORE = FALSE;
-				/* absorb the rest of the line */
-				if ((c != '\n') && (c != '\0'))
-					while (c = getwchr())
-						if ((c == '\n') || (c == '\0'))
-							break;
-				if (strcmp(arg, LEOF) == 0 && *LEOF != '\0') {
-					/*
-					 * Encountered EOF string.
-					 * Don't read any more lines.
-					 */
-					N_lines = 0;
-				}
-				return (0);
-			} else {
-				++N_args;
-				if (retarg = checklen(arg)) {
-					if ((PER_LINE &&
-					    N_lines >= PER_LINE &&
-					    (c == '\0' || c == '\n')) ||
-					    (N_ARGS && N_args >= N_ARGS)) {
-						N_lines = N_args = 0;
-						lastarg = "";
-						OK = FALSE;
-					}
-				}
-				return (retarg);
-			}
+			break;
 
 		case '"':
-			if (inquote == 1)	/* in single quoted string */
-				goto is_default;
-			if (inquote == 2)	/* terminating double quote */
+			if (ZERO || escape || (inquote == 1)) {
+				/* treat it literally */
+				escape = 0;
+				store_str(&arg, mbc, len);
+
+			} else if (inquote == 2) {
+				/* terminating double quote */
 				inquote = 0;
-			else			/* starting quoted string */
+
+			} else {
+				/* starting quoted string */
 				inquote = 2;
-			break;
+			}
+			continue;
 
 		case '\'':
-			if (inquote == 2)	/* in double quoted string */
-				goto is_default;
-			if (inquote == 1)	/* terminating single quote */
+			if (ZERO || escape || (inquote == 2)) {
+				/* treat it literally */
+				escape = 0;
+				store_str(&arg, mbc, len);
+
+			} else if (inquote == 1) {
+				/* terminating single quote */
 				inquote = 0;
-			else			/* starting quoted string */
+
+			} else {
+				/* starting quoted string */
 				inquote = 1;
-			break;
+			}
+			continue;
 
-		case L'\\':
+		case '\\':
 			/*
 			 * Any unquoted character can be escaped by
 			 * preceding it with a backslash.
 			 */
-			if (inquote == 0) {
-				c = getwchr();
-				/* store quoted char for potential requeueing */
-				requeue_offset = store_wchr(&requeue_buf,
-				    &requeue_len, requeue_offset, c);
+			if (ZERO || inquote || escape) {
+				escape = 0;
+				store_str(&arg, mbc, len);
+			} else {
+				escape = 1;
 			}
+			continue;
 
 		default:
-is_default:		if (bytes == -1) {
-				*next++ = (char)c;
-			} else {
-				(void) wctomb(next, c);
-				next += bytes;
+			/* most times we will just want to store it */
+			if (inquote || escape || ZERO || !iswctype(c, blank)) {
+				escape = 0;
+				store_str(&arg, mbc, len);
+				continue;
 			}
+			/* unquoted blank */
 			break;
 		}
+
+		/*
+		 * At this point we are processing a complete argument.
+		 */
+		if (strcmp(xarg, LEOF) == 0 && *LEOF != '\0') {
+			MORE = FALSE;
+			return (NULL);
+		}
+		if (c == WEOF) {
+			MORE = FALSE;
+		}
+		if (xarg[0] == '\0')
+			continue;
+		break;
 	}
+
+	return (xarg[0] == '\0' ? NULL : xarg);
 }
 
-
 /*
  * ermsg():	print out an error message, and indicate failure globally.
  *
@@ -825,15 +690,6 @@
 		return (TRUE);
 	}
 
-	/*
-	 * at this point, there may be unexpected input pending on stdin,
-	 * if one has used the -n flag. this presents a problem, because
-	 * if we simply do a read(), we'll get the extra input, instead
-	 * of our desired y/n input. so, we see if there's any extra
-	 * input, and if there is, then we will store it.
-	 */
-	saveinput();
-
 	(void) write(2, "?...", 4);	/* ask the user for input	*/
 
 	for (i = 0; i < LINE_MAX && read(PROMPT, &reply[i], 1) > 0; i++) {
@@ -873,7 +729,7 @@
 	bufend = &buffer[MAXSBUF];
 
 	while (*++pat) {
-		if (xindex(pat, INSPAT) == 0) {
+		if (strncmp(pat, INSPAT, ipatlen) == 0) {
 			if (pbuf + len >= bufend) {
 				break;
 			} else {
@@ -895,7 +751,7 @@
 		ermsg(gettext("Maximum argument size with insertion via %s's "
 		    "exceeded\n"), INSPAT);
 		ERR = TRUE;
-		return (0);
+		return (NULL);
 	}
 }
 
@@ -908,96 +764,62 @@
 
 	skel = p->p_skel;
 	sub = *ARGV;
-	linesize -= strlen(skel) + 1;
 	newarg = insert(skel, sub);
 	if (ERR)
 		return;
 
-	if (checklen(newarg)) {
-		if ((ibufsize += (l = strlen(newarg) + 1)) > MAXIBUF) {
-			ermsg(gettext("Insert buffer overflow\n"));
-			ERR = TRUE;
-		}
-		(void) strcpy(p_ibuf, newarg);
-		*(p->p_ARGV) = p_ibuf;
-		p_ibuf += l;
+	l = strlen(newarg) + 1;
+	if ((ibufsize += l) > MAXIBUF) {
+		EMSG(IBUFOVERFLOW);
+		ERR = TRUE;
 	}
+	(void) strcpy(p_ibuf, newarg);
+	*(p->p_ARGV) = p_ibuf;
+	p_ibuf += l;
 }
 
 
 /*
- * getchr():	get the next character.
+ * getwchr():	get the next wide character.
  * description:
- *	we get the next character from pio.structure, if there's a character
- *	to get. this may happen when we've had to flush stdin=/dev/tty,
- *	but still wanted to preserve the characters for later processing.
- *
- *	otherwise we just get the character from stdin.
+ *	we get the next character from stdin.  This returns WEOF if no
+ *	character is present.  If ZERO is set, it gets a single byte instead
+ *	a wide character.
  */
-static int
-getchr(void)
+static wint_t
+getwchr(char *mbc, size_t *sz)
 {
-	char	c;
+	size_t		i;
+	int		c;
+	wchar_t		wch;
 
-	do {
-		if (queued_data == NULL) {
-			char	*buffer;
-			int	len;
+	i = 0;
+	while (i < MB_CUR_MAX) {
 
-			if ((buffer = malloc(BUFSIZE)) == NULL) {
-				perror(gettext(
-				    "xargs: Memory allocation failure"));
-				exit(1);
-			}
+		if ((c = fgetc(stdin)) == EOF) {
 
-			if ((len = read(0, buffer, BUFSIZE)) == 0)
-				return (0);
-			if (len == -1) {
-				perror(gettext("xargs: Read failure"));
-				exit(1);
+			if (i == 0) {
+				/* TRUE EOF has been reached */
+				return (WEOF);
 			}
 
-			queue(buffer, len, TAIL);
-		}
-
-		file_offset++;
-		c = *queued_data->cur++;	 /* get the next character */
-		if (--queued_data->length == 0) { /* at the end of buffer? */
-			pio	*nxt = queued_data->next;
-
-			free(queued_data->start);
-			free(queued_data);
-			queued_data = nxt;
-		}
-	} while (c == '\0');
-	return (c);
-}
-
-
-static wchar_t
-getwchr(void)
-{
-	int		i;
-	wchar_t		wch;
-	unsigned char	buffer[MB_LEN_MAX + 1];
-
-	for (i = 0; i < (int)MB_CUR_MAX; ) {
-		if ((buffer[i++] = getchr()) == NULL) {
-			/* We have reached  EOF */
-			if (i == 1) {
-				/* TRUE EOF has been reached */
-				return (NULL);
-			}
 			/*
 			 * We have some characters in our buffer still so it
 			 * must be an invalid character right before EOF.
 			 */
 			break;
 		}
+		mbc[i++] = (char)c;
 
 		/* If this succeeds then we are done */
-		if (mbtowc(&wch, (char *)buffer, i) != -1)
-			return (wch);
+		if (ZERO) {
+			*sz = i;
+			return ((char)c);
+		}
+		if (mbtowc(&wch, mbc, i) != -1) {
+			*sz = i;
+			return ((wint_t)wch);
+		}
 	}
 
 	/*
@@ -1009,67 +831,50 @@
 	 * generated in another locale?
 	 */
 	errno = EILSEQ;
-	perror(gettext("xargs: Corrupt input file"));
+	PERR(CORRUPTFILE);
 	exit(1);
 	/* NOTREACHED */
 }
 
 
-static void
-ungetwchr(wchar_t wch)
-{
-	char	*buffer;
-	int	bytes;
-
-	if ((buffer = malloc(MB_LEN_MAX)) == NULL) {
-		perror(gettext("xargs: Memory allocation failure"));
-		exit(1);
-	}
-	bytes = wctomb(buffer, wch);
-	queue(buffer, bytes, HEAD);
-}
-
-
 static int
 lcall(char *sub, char **subargs)
 {
 	int retcode, retry = 0;
 	pid_t iwait, child;
 
-	for (; ; ) {
+	for (;;) {
 		switch (child = fork()) {
 		default:
 			while ((iwait = wait(&retcode)) != child &&
 			    iwait != (pid_t)-1)
 				;
 			if (iwait == (pid_t)-1) {
-				perror(gettext("xargs: Wait failure"));
+				PERR(WAITFAIL);
 				exit(122);
 				/* NOTREACHED */
 			}
 			if (WIFSIGNALED(retcode)) {
-				ermsg(gettext("Child killed with signal %d\n"),
-				    WTERMSIG(retcode));
+				EMSG2(CHILDSIG, WTERMSIG(retcode));
 				exit(125);
 				/* NOTREACHED */
 			}
 			if ((WEXITSTATUS(retcode) & 0377) == 0377) {
-				ermsg(gettext("Command could not continue "
-				    "processing data\n"));
+				EMSG(CHILDFAIL);
 				exit(124);
 				/* NOTREACHED */
 			}
 			return (WEXITSTATUS(retcode));
 		case 0:
 			(void) execvp(sub, subargs);
-			perror(gettext("xargs: Could not exec command"));
+			PERR(EXECFAIL);
 			if (errno == EACCES)
 				exit(126);
 			exit(127);
 			/* NOTREACHED */
 		case -1:
 			if (errno != EAGAIN && retry++ < FORK_RETRY) {
-				perror(gettext("xargs: Could not fork child"));
+				PERR(FORKFAIL);
 				exit(123);
 			}
 			(void) sleep(1);
@@ -1078,41 +883,10 @@
 }
 
 
-/*
- * If `s2' is a substring of `s1' return the offset of the first
- * occurrence of `s2' in `s1', else return -1.
- */
-static int
-xindex(char *as1, char *as2)
-{
-	char	*s1, *s2, c;
-	int		offset;
-
-	s1 = as1;
-	s2 = as2;
-	c = *s2;
-
-	while (*s1) {
-		if (*s1++ == c) {
-			offset = s1 - as1 - 1;
-			s2++;
-			while ((c = *s2++) == *s1++ && c)
-				;
-			if (c == 0)
-				return (offset);
-			s1 = offset + as1 + 1;
-			s2 = as2;
-			c = *s2;
-		}
-	}
-	return (-1);
-}
-
-
 static void
 usage()
 {
-	ermsg(gettext(USAGEMSG));
+	ermsg(_(USAGEMSG));
 	OK = FALSE;
 }
 
@@ -1142,14 +916,14 @@
 	int cflag;		/* 0 = not processing cmd arg		*/
 
 	if ((mav = malloc((ac * 2 + 1) * sizeof (char *))) == NULL) {
-		perror(gettext("xargs: Memory allocation failure"));
+		PERR(MALLOCFAIL);
 		exit(1);
 	}
 
 	/* for each argument, see if we need to change things:		*/
 	for (i = mac = cflag = 0; (av[i] != NULL) && i < ac; i++, mac++) {
 		if ((mav[mac] = strdup(av[i])) == NULL) {
-			perror(gettext("xargs: Memory allocation failure"));
+			PERR(MALLOCFAIL);
 			exit(1);
 		}
 
@@ -1187,8 +961,7 @@
 					mav[++mac] = strdup(&av[i][2]);
 				}
 				if (mav[mac] == NULL) {
-					perror(gettext("xargs: Memory"
-					    " allocation failure"));
+					PERR(MALLOCFAIL);
 					exit(1);
 				}
 				break;
@@ -1225,8 +998,7 @@
 					++mac;	/* inc to next mod'd arg */
 					if ((mav[mac] = strdup(&av[i][2])) ==
 					    NULL) {
-						perror(gettext("xargs: Memory"
-						    " allocation failure"));
+						PERR(MALLOCFAIL);
 						exit(1);
 					}
 					break;
@@ -1239,8 +1011,7 @@
 					return;
 				}
 				if ((mav[mac] = strdup(av[i])) == NULL) {
-					perror(gettext("xargs: Memory"
-					    " allocation failure"));
+					PERR(MALLOCFAIL);
 					exit(1);
 				}
 				break;
@@ -1273,63 +1044,3 @@
 
 	mav[mac] = NULL;
 }
-
-
-/*
- * saveinput(): pick up any pending input, so it can be processed later.
- *
- * description:
- *	the purpose of this routine is to allow us to handle the user
- *	typing in a 'y' or 'n', when there's existing characters already
- *	in stdin. this happens when one gives the "-n" option along with
- *	"-p". the problem occurs when the user first types in more arguments
- *	than specified by the -n number. echoargs() wants to read stdin
- *	in order to get the user's response, but if there's already stuff
- *	there, echoargs() won't read the proper character.
- *
- *	the solution provided by this routine is to pick up all characters
- *	(if any), and store them for later processing.
- */
-
-void
-saveinput()
-{
-	char *buffer;		/* ptr to the floating data buffer	*/
-	struct strpeek speek;	/* to see what's on the queue		*/
-	struct strpeek *ps;
-
-	/* if we're not in -p mode, skip				*/
-	if (PROMPT == -1) {
-		return;
-	}
-
-
-	/* now see if there's any activity pending:			*/
-	ps = &speek;
-	ps->ctlbuf.maxlen = 0;
-	ps->ctlbuf.len = 0;
-	ps->ctlbuf.buf = NULL;
-	ps->flags = 0;
-	ps->databuf.maxlen = MAX_INPUT;
-	ps->databuf.len = 0;
-	if ((buffer = malloc((size_t)MAX_INPUT)) == NULL) {
-		perror(gettext("xargs: Memory allocation failure"));
-		exit(1);
-	}
-	ps->databuf.buf = (char *)buffer;
-
-	if (ioctl(PROMPT, I_PEEK, ps) == -1) {
-		perror(gettext("xargs: I_PEEK failure"));
-		exit(1);
-	}
-
-	if (ps->databuf.len > 0) {
-		int	len;
-
-		if ((len = read(PROMPT, buffer, ps->databuf.len)) == -1) {
-			perror(gettext("xargs: read failure"));
-			exit(1);
-		}
-		queue(buffer, len, TAIL);
-	}
-}