Mercurial > illumos > onarm
view usr/src/cmd/join/join.c @ 4:1a15d5aaf794
synchronized with onnv_86 (6202) in onnv-gate
author | Koji Uno <koji.uno@sun.com> |
---|---|
date | Mon, 31 Aug 2009 14:38:03 +0900 |
parents | c9caec207d52 |
children |
line wrap: on
line source
/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ /* * Copyright 2004 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" /* join F1 F2 on stuff */ #include <stdio.h> #include <locale.h> #include <stdlib.h> #include <widec.h> #include <string.h> #include <limits.h> #include <malloc.h> #include <errno.h> #include <unistd.h> static void output(int, int); static int cmp(wchar_t *, wchar_t *); static int input(int); static void Usage(); static void init_buf(); static void get_buf(int, int); static void get_ppi(int); static void get_olist(int); wchar_t *wcsrchr(const wchar_t *, wchar_t); size_t wcslen(const wchar_t *); #define JOINFIELD 9999 #define F1 0 #define F2 1 #define NFLD LINE_MAX/2 /* max field per line */ #define NOFLD (2*NFLD) /* number arguments to -o */ #define comp() cmp(ppi[F1][j1], ppi[F2][j2]) #define get1() n1 = input(F1) #define get2() n2 = input(F2) #define putfield(string) \ if (string == (wchar_t *)NULL) \ (void) fputs(null, stdout); \ else if (*string == (wchar_t)NULL) \ (void) fputs(null, stdout); \ else \ (void) fputws(string, stdout) #define max(a, b) (a >= b ? a : b) int CNFLD; /* current number of fields per line */ int CNOFLD; /* current number of output fields */ int CBUFSIZE[2]; /* current size of input buffers */ static FILE *f[2]; static wchar_t *buf[2]; /* input lines */ static wchar_t **ppi[2]; /* pointers to fields in lines */ static int j1 = 1; /* join of this field of file 1 */ static int j2 = 1; /* join of this field of file 2 */ static int *olist; /* output these fields */ static int *olistf; /* from these files */ static int no = 0; /* number of entries in olist */ static wchar_t sep1 = L' '; /* default field separator */ static wchar_t sep2 = L'\t'; static char *null = ""; static int aflg = 0; static int vflg = 0; static int tflg = 0; int main(int argc, char *argv[]) { int i, j; int n1, n2; off_t top2, bot2; int cmpresult; int opt, filenumber; int nextargopt; size_t t; char buffer1[BUFSIZ]; char tmpfile[20]; int fd; (void) setlocale(LC_ALL, ""); #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */ #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */ #endif (void) textdomain(TEXT_DOMAIN); (void) init_buf(); /* allocate buffers */ /* check for non-standard "-j#" option, last 2 arguments are files */ for (i = 1; i < (argc - 2); i++) { if (strcmp(argv[i], "--") == 0) break; if (strcmp(argv[i], "-j1") == 0) { if (i == argc - 3) Usage(); j1 = (int)strtol(argv[i+1], (char **)NULL, 10); for (j = i; j < argc - 1; j++) { argv[j] = argv[j+2]; } argc -= 2; i--; } else if (strcmp(argv[i], "-j2") == 0) { if (i >= argc - 3) Usage(); j2 = (int)strtol(argv[i+1], (char **)NULL, 10); for (j = i; j < argc - 1; j++) { argv[j] = argv[j+2]; } argc -= 2; i--; } } /* check for non-standard "-o" option, last 2 arguments are files */ for (i = 1; i < argc - 2; i++) { /* stop looking for -o if "--" */ if (strcmp(argv[i], "--") == 0) break; if (strncmp(argv[i], "-o", 2) == 0) { if (argv[i][2] == '\0') { for (j = i; j < argc; j++) { argv[j] = argv[j+1]; } argc--; } else { argv[i] += 2 + (int)strspn(&argv[i][2], " "); } if (i == argc - 2) Usage(); for (no = 0; (no < 2*CNFLD) && (i < argc - 2); no++) { if (no > CNOFLD - 1) { /* Get larger table for outlists */ /* (CNOFLD is updated.) */ get_olist(CNOFLD + NOFLD); } if (strncmp(argv[i], "1.", 2) == 0) { olistf[no] = F1; olist[no] = (int)strtol(&argv[i][2], (char **)NULL, 10); nextargopt = 2 + (int) strspn(&argv[i][2], "0123456789"); } else if (strncmp(argv[i], "2.", 2) == 0) { olistf[no] = F2; olist[no] = (int)strtol(&argv[i][2], (char **)NULL, 10); nextargopt = 2 + (int) strspn(&argv[i][2], "0123456789"); } else if (argv[i][0] == '0') { olistf[no] = JOINFIELD; nextargopt = 1; } else break; if (olist[no] > CNFLD) get_ppi(olist[no]); if (argv[i][nextargopt] == '\0') { for (j = i; j < argc; j++) { argv[j] = argv[j+1]; } argc--; } else if ((argv[i][nextargopt] == ' ') || (argv[i][nextargopt] == ',')) { argv[i] += nextargopt + 1; } else Usage(); } } } /* get other options */ while ((opt = getopt(argc, argv, "1:2:a:e:j:t:v:")) != EOF) { switch (opt) { case '1': j1 = (int)strtol(optarg, (char **)NULL, 10); break; case '2': j2 = (int)strtol(optarg, (char **)NULL, 10); break; case 'a': case 'v': filenumber = (int)strtol(optarg, (char **)NULL, 10); if ((filenumber != 1) && (filenumber != 2)) Usage(); aflg |= filenumber; if (opt == 'v') vflg |= filenumber; break; case 'e': null = optarg; break; case 'j': j1 = j2 = (int)strtol(optarg, (char **)NULL, 10); break; case 't': tflg = 1; (void) mbtowc(&sep1, optarg, MB_CUR_MAX); sep2 = sep1; break; } } if (j1 > CNFLD || j2 > CNFLD) get_ppi(max(j1, j2)); /* check options */ if ((j1 > CNFLD) || (j2 > CNFLD) || (j1 <= 0) || (j2 <= 0)) { (void) fprintf(stderr, gettext( "join: bad join field number\n")); Usage(); } /* 0 origin */ for (i = 0; i < no; i++) olist[i]--; j1--; j2--; if (argc - optind != 2) Usage(); if (strcmp(argv[optind], "-") == 0) f[F1] = stdin; else if ((f[F1] = fopen(argv[optind], "r")) == NULL) { perror(argv[optind]); exit(1); } if (strcmp(argv[optind+1], "-") == 0) { (void) strncpy(tmpfile, "/tmp/joinXXXXXX", 20); if ((fd = mkstemp(tmpfile)) < 0) { perror(gettext("join: cannot create tmpfile")); exit(1); } if ((f[F2] = fdopen(fd, "w+")) == NULL) { perror(gettext("join: cannot open tmpfile")); exit(1); } /* tmpfile is automatically removed on exit */ (void) unlink(tmpfile); while ((t = fread(buffer1, 1, BUFSIZ, stdin)) != 0) { if ((fwrite(buffer1, 1, t, f[F2])) != t) { (void) fprintf(stderr, gettext( "join: cannot write to tmpfile\n")); exit(1); } } (void) fflush(f[F2]); rewind(f[F2]); } else if ((f[F2] = fopen(argv[optind+1], "r")) == NULL) { perror(argv[optind+1]); exit(1); } /* set bottom mark on file2 */ bot2 = ftello(f[F2]); /* input a line from each file */ get1(); get2(); /* while lines in both files or -a|v and lines in ether file */ while (((n1 > 0) && (n2 > 0)) || ((aflg != 0) && ((n1 + n2) > 0))) { /* compare join fields */ cmpresult = comp(); /* joinfiled1 > joinfield2 or file1 exasted */ if (((n1 > 0) && (n2 > 0) && (cmpresult > 0)) || (n1 == 0)) { /* outputing unpaired ? */ if (aflg & 2) output(0, n2); /* mark bottom of file2 */ bot2 = ftello(f[F2]); /* input line from file2 */ get2(); /* joinfiled1 < joinfield2 or file2 exasted */ } else if (((n1 > 0) && (n2 > 0) && (cmpresult < 0)) || (n2 == 0)) { /* outputing unpaired ? */ if (aflg & 1) output(n1, 0); /* input line from file1 */ get1(); /* line1 and line2 not empty and joinfield1 == joinfield2 */ } else { /* for lines in file2 that match join field */ while (n2 > 0 && comp() == 0) { /* if not -v output line */ if (!vflg) output(n1, n2); /* mark top of file2 */ top2 = ftello(f[F2]); /* input line from file2 */ get2(); } /* back to bottom line in file2 */ (void) fseeko(f[F2], (off_t)bot2, SEEK_SET); /* input line form file1 and file2 */ get2(); get1(); for (;;) { /* compare join fields */ cmpresult = comp(); /* * line1 and line2 not empty and * joinfield1 == joinfield2 */ if (n1 > 0 && n2 > 0 && cmpresult == 0) { /* if not -v output line */ if (!vflg) output(n1, n2); /* input line from file2 */ get2(); /* joinfiled1 < joinfield2 or file2 exasted */ } else if (n1 > 0 && n2 > 0 && cmpresult < 0 || n2 == 0) { /* seek to bottom of file2 */ (void) fseeko(f[F2], (off_t)bot2, SEEK_SET); /* input line form file1 and file2 */ get2(); get1(); /* file1 exasted or joinfile1 > joinfield2 */ } else { /* seek to top of file2 */ (void) fseeko(f[F2], (off_t)top2, SEEK_SET); /* set bottom at top */ bot2 = top2; /* input line form file2 */ get2(); break; } } } } return (0); } static int input(int n) /* get input line and split into fields */ { int i, c; wchar_t *bp; wchar_t *tbp; wchar_t **pp; int nread; bp = buf[n]; pp = ppi[n]; errno = 0; if (fgetws(bp, CBUFSIZE[n], f[n]) == (wchar_t *)NULL) { if (errno) { perror("join"); exit(1); } return (0); } else { if (errno == EILSEQ) { (void) fprintf(stderr, gettext( "join: invalid char following \"")); (void) fputws(bp, stderr); (void) fprintf(stderr, "\": "); perror(""); exit(1); } else if (errno) { perror("join"); exit(1); } /* Check for long lines */ if (((wchar_t *)wcsrchr((wchar_t *)bp, L'\n') == (wchar_t *)NULL) && (size_t)wcslen((wchar_t *)bp) == (size_t)(CBUFSIZE[n] - 1)) { tbp = bp; nread = CBUFSIZE[n] - 1; while ((wchar_t *)wcsrchr((wchar_t *)tbp, L'\n') == (wchar_t *)NULL) { /* Increase buffer by LINE_MAX */ /* (CBUFSIZE[n] is updated.) */ get_buf(n, CBUFSIZE[n] + LINE_MAX); bp = buf[n]; tbp = buf[n] + nread; /* Read rest of line */ errno = 0; if (fgetws(tbp, LINE_MAX + 1, f[n]) == (wchar_t *)NULL) { if (errno) { perror("join"); exit(1); } break; } else if (errno == EILSEQ) { (void) fprintf(stderr, gettext( "join: invalid char following \"")); (void) fputws(bp, stderr); (void) fprintf(stderr, "\": "); perror(""); exit(1); } else if (errno) { perror("join"); exit(1); } nread += (size_t)wcslen((wchar_t *)tbp); } } } bp = buf[n]; i = 0; do { i++; /* Check for overflow of ppi[n] */ if (i > CNFLD - 1) { /* account for 0 termination */ /* Reallocate larger buffers (CNFLD is updated.) */ get_ppi(CNFLD + NFLD); /* Restore local pointer */ pp = &ppi[n][i-1]; } if (!tflg) /* strip multiples */ while ((c = *bp) == sep1 || c == sep2) { bp++; /* skip blanks */ } *pp++ = bp; /* record beginning */ while ((c = *bp) != sep1 && c != L'\n' && c != sep2 && c != L'\0') { bp++; } *bp++ = L'\0'; /* mark end by overwriting blank */ } while ((c != L'\n') && (c != L'\0') && (pp != &ppi[n][CNFLD])); *pp = (wchar_t *)NULL; return (i); } static void output(on1, on2) /* print items from olist */ int on1, on2; { int i; if (no <= 0) { /* default case */ if (on1) putfield(ppi[F1][j1]); else putfield(ppi[F2][j2]); for (i = 0; i < on1; i++) if (i != j1) { (void) putwchar(sep1); putfield(ppi[F1][i]); } for (i = 0; i < on2; i++) if (i != j2) { (void) putwchar(sep1); putfield(ppi[F2][i]); } (void) putwchar(L'\n'); } else { for (i = 0; i < no; i++) { if (olistf[i] == F1 && on1 <= olist[i] || olistf[i] == F2 && on2 <= olist[i]) { (void) fputs(null, stdout); } else if (olistf[i] == JOINFIELD) { if (on1) putfield(ppi[F1][j1]); else putfield(ppi[F2][j2]); } else putfield(ppi[olistf[i]][olist[i]]); if (i < no - 1) #if defined(__lint) /* lint doesn't grok "%wc" */ /* EMPTY */; #else (void) printf("%wc", sep1); #endif else (void) putwchar(L'\n'); } } } static int cmp(s1, s2) wchar_t *s1, *s2; { int rc; if (s1 == (wchar_t *)NULL) { if (s2 == (wchar_t *)NULL) rc = 0; else rc = -1; } else if (s2 == (wchar_t *)NULL) rc = 1; else rc = wscoll(s1, s2); return (rc); } static void Usage() { (void) fprintf(stderr, gettext( "usage: join [-a file_number | -v file_number] [-o list [-e string]]\n" " [-t char] [-1 field] [-2 field] file1 file2\n\n" " join [-a file_number] [-j field] [-j1 field] [-j2 field]\n" " [-o list [-e string]] [-t char] file1 file2\n")); exit(1); } /* * Allocate memory for buffers. */ static void init_buf() { (void) get_buf(F1, LINE_MAX); (void) get_buf(F2, LINE_MAX); (void) get_ppi(NFLD); (void) get_olist(NOFLD); } static void get_ppi(nfld) int nfld; { int i; for (i = 0; i < 2; i++) { if (ppi[i]) { if ((ppi[i] = (wchar_t **)realloc((char *)ppi[i], (unsigned)(nfld * sizeof (wchar_t *)))) == NULL) { (void) fprintf(stderr, gettext("realloc pointer table failed\n")); exit(1); } } else { if ((ppi[i] = (wchar_t **)calloc((unsigned)nfld, (unsigned)sizeof (wchar_t *))) == NULL) { (void) fprintf(stderr, gettext("calloc pointer table failed\n")); exit(1); } } } CNFLD = nfld; } static void get_buf(i, size) int i; int size; { if (buf[i]) { if ((buf[i] = (wchar_t *) realloc(buf[i], (unsigned)(size * sizeof (wchar_t)))) == NULL) { (void) fprintf(stderr, gettext("realloc input buffer failed\n")); exit(1); } } else { if ((buf[i] = (wchar_t *) calloc((unsigned)size, (unsigned)sizeof (wchar_t))) == NULL) { (void) fprintf(stderr, gettext("calloc input buffer failed\n")); exit(1); } } CBUFSIZE[i] = size; } static void get_olist(onfld) int onfld; { if (olist) { if ((olist = (int *)realloc((wchar_t *)olist, (unsigned)(onfld * sizeof (int)))) == NULL) { (void) fprintf(stderr, gettext("realloc olist failed\n")); exit(1); } } else { if ((olist = (int *) calloc((unsigned)onfld, (unsigned)sizeof (int))) == NULL) { (void) fprintf(stderr, gettext("calloc olist failed\n")); exit(1); } } if (olistf) { if ((olistf = (int *)realloc((wchar_t *)olistf, (unsigned)(onfld * sizeof (int)))) == NULL) { (void) fprintf(stderr, gettext("realloc olistf failed\n")); exit(1); } } else { if ((olistf = (int *) calloc((unsigned)onfld, (unsigned)sizeof (int))) == NULL) { (void) fprintf(stderr, gettext("calloc olistf failed\n")); exit(1); } } CNOFLD = onfld; }