commit b987143eb6c6c61a63323e4aec17fe1cc1465576
parent 16168840c3172c0f3297452652c5002f995aca3b
Author: NunoSempere <nuno.sempere@protonmail.com>
Date: Mon, 11 Sep 2023 16:34:53 +0200
add historical versions
Diffstat:
5 files changed, 1847 insertions(+), 0 deletions(-)
diff --git a/historical/busybox-wc.c b/historical/busybox-wc.c
@@ -0,0 +1,257 @@
+/* vi: set sw=4 ts=4: */
+/*
+ * wc implementation for busybox
+ *
+ * Copyright (C) 2003 Manuel Novoa III <mjn3@codepoet.org>
+ *
+ * Licensed under GPLv2 or later, see file LICENSE in this source tree.
+ */
+/* Mar 16, 2003 Manuel Novoa III (mjn3@codepoet.org)
+ *
+ * Rewritten to fix a number of problems and do some size optimizations.
+ * Problems in the previous busybox implementation (besides bloat) included:
+ * 1) broken 'wc -c' optimization (read note below)
+ * 2) broken handling of '-' args
+ * 3) no checking of ferror on EOF returns
+ * 4) isprint() wasn't considered when word counting.
+ *
+ * NOTES:
+ *
+ * The previous busybox wc attempted an optimization using stat for the
+ * case of counting chars only. I omitted that because it was broken.
+ * It didn't take into account the possibility of input coming from a
+ * pipe, or input from a file with file pointer not at the beginning.
+ *
+ * To implement such a speed optimization correctly, not only do you
+ * need the size, but also the file position. Note also that the
+ * file position may be past the end of file. Consider the example
+ * (adapted from example in gnu wc.c)
+ *
+ * echo hello > /tmp/testfile &&
+ * (dd ibs=1k skip=1 count=0 &> /dev/null; wc -c) < /tmp/testfile
+ *
+ * for which 'wc -c' should output '0'.
+ */
+//config:config WC
+//config: bool "wc (4.7 kb)"
+//config: default y
+//config: help
+//config: wc is used to print the number of bytes, words, and lines,
+//config: in specified files.
+//config:
+//config:config FEATURE_WC_LARGE
+//config: bool "Support very large counts"
+//config: default y
+//config: depends on WC
+//config: help
+//config: Use "unsigned long long" for counter variables.
+
+//applet:IF_WC(APPLET(wc, BB_DIR_USR_BIN, BB_SUID_DROP))
+
+//kbuild:lib-$(CONFIG_WC) += wc.o
+
+/* BB_AUDIT SUSv3 compliant. */
+/* http://www.opengroup.org/onlinepubs/007904975/utilities/wc.html */
+
+#include "libbb.h"
+#include "unicode.h"
+
+#if !ENABLE_LOCALE_SUPPORT
+# undef isprint
+# undef isspace
+# define isprint(c) ((unsigned)((c) - 0x20) <= (0x7e - 0x20))
+# define isspace(c) ((c) == ' ')
+#endif
+
+#if ENABLE_FEATURE_WC_LARGE
+# define COUNT_T unsigned long long
+# define COUNT_FMT "llu"
+#else
+# define COUNT_T unsigned
+# define COUNT_FMT "u"
+#endif
+
+/* We support -m even when UNICODE_SUPPORT is off,
+ * we just don't advertise it in help text,
+ * since it is the same as -c in this case.
+ */
+
+//usage:#define wc_trivial_usage
+//usage: "[-c"IF_UNICODE_SUPPORT("m")"lwL] [FILE]..."
+//usage:
+//usage:#define wc_full_usage "\n\n"
+//usage: "Count lines, words, and bytes for FILEs (or stdin)\n"
+//usage: "\n -c Count bytes"
+//usage: IF_UNICODE_SUPPORT(
+//usage: "\n -m Count characters"
+//usage: )
+//usage: "\n -l Count newlines"
+//usage: "\n -w Count words"
+//usage: "\n -L Print longest line length"
+//usage:
+//usage:#define wc_example_usage
+//usage: "$ wc /etc/passwd\n"
+//usage: " 31 46 1365 /etc/passwd\n"
+
+/* Order is important if we want to be compatible with
+ * column order in "wc -cmlwL" output:
+ */
+enum {
+ WC_LINES = 0, /* -l */
+ WC_WORDS = 1, /* -w */
+ WC_UNICHARS = 2, /* -m */
+ WC_BYTES = 3, /* -c */
+ WC_LENGTH = 4, /* -L */
+ NUM_WCS = 5,
+};
+
+int wc_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
+int wc_main(int argc UNUSED_PARAM, char **argv)
+{
+ const char *arg;
+ const char *start_fmt = " %9"COUNT_FMT + 1;
+ const char *fname_fmt = " %s\n";
+ COUNT_T *pcounts;
+ COUNT_T counts[NUM_WCS];
+ COUNT_T totals[NUM_WCS];
+ int num_files;
+ smallint status = EXIT_SUCCESS;
+ unsigned print_type;
+
+ init_unicode();
+
+ print_type = getopt32(argv, "lwmcL");
+
+ if (print_type == 0) {
+ print_type = (1 << WC_LINES) | (1 << WC_WORDS) | (1 << WC_BYTES);
+ }
+
+ argv += optind;
+ if (!argv[0]) {
+ *--argv = (char *) bb_msg_standard_input;
+ fname_fmt = "\n";
+ }
+ if (!argv[1]) { /* zero or one filename? */
+ if (!((print_type-1) & print_type)) /* exactly one option? */
+ start_fmt = "%"COUNT_FMT;
+ }
+
+ memset(totals, 0, sizeof(totals));
+
+ pcounts = counts;
+
+ num_files = 0;
+ while ((arg = *argv++) != NULL) {
+ FILE *fp;
+ const char *s;
+ unsigned u;
+ unsigned linepos;
+ smallint in_word;
+
+ ++num_files;
+ fp = fopen_or_warn_stdin(arg);
+ if (!fp) {
+ status = EXIT_FAILURE;
+ continue;
+ }
+
+ memset(counts, 0, sizeof(counts));
+ linepos = 0;
+ in_word = 0;
+
+ while (1) {
+ int c;
+ /* Our -w doesn't match GNU wc exactly... oh well */
+
+ c = getc(fp);
+ if (c == EOF) {
+ if (ferror(fp)) {
+ bb_simple_perror_msg(arg);
+ status = EXIT_FAILURE;
+ }
+ goto DO_EOF; /* Treat an EOF as '\r'. */
+ }
+
+ /* Cater for -c and -m */
+ ++counts[WC_BYTES];
+ if (unicode_status != UNICODE_ON /* every byte is a new char */
+ || (c & 0xc0) != 0x80 /* it isn't a 2nd+ byte of a Unicode char */
+ ) {
+ ++counts[WC_UNICHARS];
+ }
+
+ if (isprint_asciionly(c)) { /* FIXME: not unicode-aware */
+ ++linepos;
+ if (!isspace(c)) {
+ in_word = 1;
+ continue;
+ }
+ } else if ((unsigned)(c - 9) <= 4) {
+ /* \t 9
+ * \n 10
+ * \v 11
+ * \f 12
+ * \r 13
+ */
+ if (c == '\t') {
+ linepos = (linepos | 7) + 1;
+ } else { /* '\n', '\r', '\f', or '\v' */
+ DO_EOF:
+ if (linepos > counts[WC_LENGTH]) {
+ counts[WC_LENGTH] = linepos;
+ }
+ if (c == '\n') {
+ ++counts[WC_LINES];
+ }
+ if (c != '\v') {
+ linepos = 0;
+ }
+ }
+ } else {
+ continue;
+ }
+
+ counts[WC_WORDS] += in_word;
+ in_word = 0;
+ if (c == EOF) {
+ break;
+ }
+ }
+
+ fclose_if_not_stdin(fp);
+
+ if (totals[WC_LENGTH] < counts[WC_LENGTH]) {
+ totals[WC_LENGTH] = counts[WC_LENGTH];
+ }
+ totals[WC_LENGTH] -= counts[WC_LENGTH];
+
+ OUTPUT:
+ /* coreutils wc tries hard to print pretty columns
+ * (saves results for all files, finds max col len etc...)
+ * we won't try that hard, it will bloat us too much */
+ s = start_fmt;
+ u = 0;
+ do {
+ if (print_type & (1 << u)) {
+ printf(s, pcounts[u]);
+ s = " %9"COUNT_FMT; /* Ok... restore the leading space. */
+ }
+ totals[u] += pcounts[u];
+ } while (++u < NUM_WCS);
+ printf(fname_fmt, arg);
+ }
+
+ /* If more than one file was processed, we want the totals. To save some
+ * space, we set the pcounts ptr to the totals array. This has the side
+ * effect of trashing the totals array after outputting it, but that's
+ * irrelavent since we no longer need it. */
+ if (num_files > 1) {
+ num_files = 0; /* Make sure we don't get here again. */
+ arg = "total";
+ pcounts = totals;
+ --argv;
+ goto OUTPUT;
+ }
+
+ fflush_stdout_and_exit(status);
+}
diff --git a/historical/gnu-wc.c b/historical/gnu-wc.c
@@ -0,0 +1,1034 @@
+/* wc - print the number of lines, words, and bytes in files
+ Copyright (C) 1985-2023 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>. */
+
+/* Written by Paul Rubin, phr@ocf.berkeley.edu
+ and David MacKenzie, djm@gnu.ai.mit.edu. */
+
+#include <config.h>
+
+#include <stdckdint.h>
+#include <stdio.h>
+#include <getopt.h>
+#include <sys/types.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#include "system.h"
+#include "assure.h"
+#include "argmatch.h"
+#include "argv-iter.h"
+#include "fadvise.h"
+#include "mbchar.h"
+#include "physmem.h"
+#include "readtokens0.h"
+#include "safe-read.h"
+#include "stat-size.h"
+#include "xbinary-io.h"
+
+#if !defined iswspace && !HAVE_ISWSPACE
+# define iswspace(wc) \
+ ((wc) == to_uchar (wc) && isspace (to_uchar (wc)))
+#endif
+
+/* The official name of this program (e.g., no 'g' prefix). */
+#define PROGRAM_NAME "wc"
+
+#define AUTHORS \
+ proper_name ("Paul Rubin"), \
+ proper_name ("David MacKenzie")
+
+/* Size of atomic reads. */
+#define BUFFER_SIZE (16 * 1024)
+
+#ifdef USE_AVX2_WC_LINECOUNT
+/* From wc_avx2.c */
+extern bool
+wc_lines_avx2 (char const *file, int fd, uintmax_t *lines_out,
+ uintmax_t *bytes_out);
+#endif
+
+static bool debug;
+
+/* Cumulative number of lines, words, chars and bytes in all files so far.
+ max_line_length is the maximum over all files processed so far. */
+static uintmax_t total_lines;
+static uintmax_t total_words;
+static uintmax_t total_chars;
+static uintmax_t total_bytes;
+static uintmax_t total_lines_overflow;
+static uintmax_t total_words_overflow;
+static uintmax_t total_chars_overflow;
+static uintmax_t total_bytes_overflow;
+static uintmax_t max_line_length;
+
+/* Which counts to print. */
+static bool print_lines, print_words, print_chars, print_bytes;
+static bool print_linelength;
+
+/* The print width of each count. */
+static int number_width;
+
+/* True if we have ever read the standard input. */
+static bool have_read_stdin;
+
+/* Used to determine if file size can be determined without reading. */
+static size_t page_size;
+
+/* Enable to _not_ treat non breaking space as a word separator. */
+static bool posixly_correct;
+
+/* The result of calling fstat or stat on a file descriptor or file. */
+struct fstatus
+{
+ /* If positive, fstat or stat has not been called yet. Otherwise,
+ this is the value returned from fstat or stat. */
+ int failed;
+
+ /* If FAILED is zero, this is the file's status. */
+ struct stat st;
+};
+
+/* For long options that have no equivalent short option, use a
+ non-character as a pseudo short option, starting with CHAR_MAX + 1. */
+enum
+{
+ DEBUG_PROGRAM_OPTION = CHAR_MAX + 1,
+ FILES0_FROM_OPTION,
+ TOTAL_OPTION,
+};
+
+static struct option const longopts[] =
+{
+ {"bytes", no_argument, nullptr, 'c'},
+ {"chars", no_argument, nullptr, 'm'},
+ {"lines", no_argument, nullptr, 'l'},
+ {"words", no_argument, nullptr, 'w'},
+ {"debug", no_argument, nullptr, DEBUG_PROGRAM_OPTION},
+ {"files0-from", required_argument, nullptr, FILES0_FROM_OPTION},
+ {"max-line-length", no_argument, nullptr, 'L'},
+ {"total", required_argument, nullptr, TOTAL_OPTION},
+ {GETOPT_HELP_OPTION_DECL},
+ {GETOPT_VERSION_OPTION_DECL},
+ {nullptr, 0, nullptr, 0}
+};
+
+enum total_type
+ {
+ total_auto, /* 0: default or --total=auto */
+ total_always, /* 1: --total=always */
+ total_only, /* 2: --total=only */
+ total_never /* 3: --total=never */
+ };
+static char const *const total_args[] =
+{
+ "auto", "always", "only", "never", nullptr
+};
+static enum total_type const total_types[] =
+{
+ total_auto, total_always, total_only, total_never
+};
+ARGMATCH_VERIFY (total_args, total_types);
+static enum total_type total_mode = total_auto;
+
+#ifdef USE_AVX2_WC_LINECOUNT
+static bool
+avx2_supported (void)
+{
+ bool avx_enabled = 0 < __builtin_cpu_supports ("avx2");
+
+ if (debug)
+ error (0, 0, (avx_enabled
+ ? _("using avx2 hardware support")
+ : _("avx2 support not detected")));
+
+ return avx_enabled;
+}
+#endif
+
+void
+usage (int status)
+{
+ if (status != EXIT_SUCCESS)
+ emit_try_help ();
+ else
+ {
+ printf (_("\
+Usage: %s [OPTION]... [FILE]...\n\
+ or: %s [OPTION]... --files0-from=F\n\
+"),
+ program_name, program_name);
+ fputs (_("\
+Print newline, word, and byte counts for each FILE, and a total line if\n\
+more than one FILE is specified. A word is a non-zero-length sequence of\n\
+printable characters delimited by white space.\n\
+"), stdout);
+
+ emit_stdin_note ();
+
+ fputs (_("\
+\n\
+The options below may be used to select which counts are printed, always in\n\
+the following order: newline, word, character, byte, maximum line length.\n\
+ -c, --bytes print the byte counts\n\
+ -m, --chars print the character counts\n\
+ -l, --lines print the newline counts\n\
+"), stdout);
+ fputs (_("\
+ --files0-from=F read input from the files specified by\n\
+ NUL-terminated names in file F;\n\
+ If F is - then read names from standard input\n\
+ -L, --max-line-length print the maximum display width\n\
+ -w, --words print the word counts\n\
+"), stdout);
+ fputs (_("\
+ --total=WHEN when to print a line with total counts;\n\
+ WHEN can be: auto, always, only, never\n\
+"), stdout);
+ fputs (HELP_OPTION_DESCRIPTION, stdout);
+ fputs (VERSION_OPTION_DESCRIPTION, stdout);
+ emit_ancillary_info (PROGRAM_NAME);
+ }
+ exit (status);
+}
+
+/* Return non zero if a non breaking space. */
+ATTRIBUTE_PURE
+static int
+iswnbspace (wint_t wc)
+{
+ return ! posixly_correct
+ && (wc == 0x00A0 || wc == 0x2007
+ || wc == 0x202F || wc == 0x2060);
+}
+
+static int
+isnbspace (int c)
+{
+ return iswnbspace (btowc (c));
+}
+
+/* FILE is the name of the file (or null for standard input)
+ associated with the specified counters. */
+static void
+write_counts (uintmax_t lines,
+ uintmax_t words,
+ uintmax_t chars,
+ uintmax_t bytes,
+ uintmax_t linelength,
+ char const *file)
+{
+ static char const format_sp_int[] = " %*s";
+ char const *format_int = format_sp_int + 1;
+ char buf[INT_BUFSIZE_BOUND (uintmax_t)];
+
+ if (print_lines)
+ {
+ printf (format_int, number_width, umaxtostr (lines, buf));
+ format_int = format_sp_int;
+ }
+ if (print_words)
+ {
+ printf (format_int, number_width, umaxtostr (words, buf));
+ format_int = format_sp_int;
+ }
+ if (print_chars)
+ {
+ printf (format_int, number_width, umaxtostr (chars, buf));
+ format_int = format_sp_int;
+ }
+ if (print_bytes)
+ {
+ printf (format_int, number_width, umaxtostr (bytes, buf));
+ format_int = format_sp_int;
+ }
+ if (print_linelength)
+ {
+ printf (format_int, number_width, umaxtostr (linelength, buf));
+ }
+ if (file)
+ printf (" %s", strchr (file, '\n') ? quotef (file) : file);
+ putchar ('\n');
+}
+
+static bool
+wc_lines (char const *file, int fd, uintmax_t *lines_out, uintmax_t *bytes_out)
+{
+ size_t bytes_read;
+ uintmax_t lines, bytes;
+ char buf[BUFFER_SIZE + 1];
+ bool long_lines = false;
+
+ if (!lines_out || !bytes_out)
+ {
+ return false;
+ }
+
+ lines = bytes = 0;
+
+ while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
+ {
+
+ if (bytes_read == SAFE_READ_ERROR)
+ {
+ error (0, errno, "%s", quotef (file));
+ return false;
+ }
+
+ bytes += bytes_read;
+
+ char *p = buf;
+ char *end = buf + bytes_read;
+ uintmax_t plines = lines;
+
+ if (! long_lines)
+ {
+ /* Avoid function call overhead for shorter lines. */
+ while (p != end)
+ lines += *p++ == '\n';
+ }
+ else
+ {
+ /* rawmemchr is more efficient with longer lines. */
+ *end = '\n';
+ while ((p = rawmemchr (p, '\n')) < end)
+ {
+ ++p;
+ ++lines;
+ }
+ }
+
+ /* If the average line length in the block is >= 15, then use
+ memchr for the next block, where system specific optimizations
+ may outweigh function call overhead.
+ FIXME: This line length was determined in 2015, on both
+ x86_64 and ppc64, but it's worth re-evaluating in future with
+ newer compilers, CPUs, or memchr() implementations etc. */
+ if (lines - plines <= bytes_read / 15)
+ long_lines = true;
+ else
+ long_lines = false;
+ }
+
+ *bytes_out = bytes;
+ *lines_out = lines;
+
+ return true;
+}
+
+/* Count words. FILE_X is the name of the file (or null for standard
+ input) that is open on descriptor FD. *FSTATUS is its status.
+ CURRENT_POS is the current file offset if known, negative if unknown.
+ Return true if successful. */
+static bool
+wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
+{
+ bool ok = true;
+ char buf[BUFFER_SIZE + 1];
+ size_t bytes_read;
+ uintmax_t lines, words, chars, bytes, linelength;
+ bool count_bytes, count_chars, count_complicated;
+ char const *file = file_x ? file_x : _("standard input");
+
+ lines = words = chars = bytes = linelength = 0;
+
+ /* If in the current locale, chars are equivalent to bytes, we prefer
+ counting bytes, because that's easier. */
+#if MB_LEN_MAX > 1
+ if (MB_CUR_MAX > 1)
+ {
+ count_bytes = print_bytes;
+ count_chars = print_chars;
+ }
+ else
+#endif
+ {
+ count_bytes = print_bytes || print_chars;
+ count_chars = false;
+ }
+ count_complicated = print_words || print_linelength;
+
+ /* Advise the kernel of our access pattern only if we will read(). */
+ if (!count_bytes || count_chars || print_lines || count_complicated)
+ fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
+
+ /* When counting only bytes, save some line- and word-counting
+ overhead. If FD is a 'regular' Unix file, using lseek is enough
+ to get its 'size' in bytes. Otherwise, read blocks of BUFFER_SIZE
+ bytes at a time until EOF. Note that the 'size' (number of bytes)
+ that wc reports is smaller than stats.st_size when the file is not
+ positioned at its beginning. That's why the lseek calls below are
+ necessary. For example the command
+ '(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group'
+ should make wc report '0' bytes. */
+
+ if (count_bytes && !count_chars && !print_lines && !count_complicated)
+ {
+ bool skip_read = false;
+
+ if (0 < fstatus->failed)
+ fstatus->failed = fstat (fd, &fstatus->st);
+
+ /* For sized files, seek to one st_blksize before EOF rather than to EOF.
+ This works better for files in proc-like file systems where
+ the size is only approximate. */
+ if (! fstatus->failed && usable_st_size (&fstatus->st)
+ && 0 <= fstatus->st.st_size)
+ {
+ off_t end_pos = fstatus->st.st_size;
+ if (current_pos < 0)
+ current_pos = lseek (fd, 0, SEEK_CUR);
+
+ if (end_pos % page_size)
+ {
+ /* We only need special handling of /proc and /sys files etc.
+ when they're a multiple of PAGE_SIZE. In the common case
+ for files with st_size not a multiple of PAGE_SIZE,
+ it's more efficient and accurate to use st_size.
+
+ Be careful here. The current position may actually be
+ beyond the end of the file. As in the example above. */
+
+ bytes = end_pos < current_pos ? 0 : end_pos - current_pos;
+ if (bytes && 0 <= lseek (fd, bytes, SEEK_CUR))
+ skip_read = true;
+ else
+ bytes = 0;
+ }
+ else
+ {
+ off_t hi_pos = (end_pos
+ - end_pos % (STP_BLKSIZE (&fstatus->st) + 1));
+ if (0 <= current_pos && current_pos < hi_pos
+ && 0 <= lseek (fd, hi_pos, SEEK_CUR))
+ bytes = hi_pos - current_pos;
+ }
+ }
+
+ if (! skip_read)
+ {
+ fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
+ while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
+ {
+ if (bytes_read == SAFE_READ_ERROR)
+ {
+ error (0, errno, "%s", quotef (file));
+ ok = false;
+ break;
+ }
+ bytes += bytes_read;
+ }
+ }
+ }
+ else if (!count_chars && !count_complicated)
+ {
+#ifdef USE_AVX2_WC_LINECOUNT
+ static bool (*wc_lines_p) (char const *, int, uintmax_t *, uintmax_t *);
+ if (!wc_lines_p)
+ wc_lines_p = avx2_supported () ? wc_lines_avx2 : wc_lines;
+#else
+ bool (*wc_lines_p) (char const *, int, uintmax_t *, uintmax_t *)
+ = wc_lines;
+#endif
+
+ /* Use a separate loop when counting only lines or lines and bytes --
+ but not chars or words. */
+ ok = wc_lines_p (file, fd, &lines, &bytes);
+ }
+#if MB_LEN_MAX > 1
+# define SUPPORT_OLD_MBRTOWC 1
+ else if (MB_CUR_MAX > 1)
+ {
+ bool in_word = false;
+ uintmax_t linepos = 0;
+ mbstate_t state = {0};
+ bool in_shift = false;
+# if SUPPORT_OLD_MBRTOWC
+ /* Back-up the state before each multibyte character conversion and
+ move the last incomplete character of the buffer to the front
+ of the buffer. This is needed because we don't know whether
+ the 'mbrtowc' function updates the state when it returns -2, --
+ this is the ISO C 99 and glibc-2.2 behavior - or not - amended
+ ANSI C, glibc-2.1 and Solaris 5.7 behavior. We don't have an
+ autoconf test for this, yet. */
+ size_t prev = 0; /* number of bytes carried over from previous round */
+# else
+ const size_t prev = 0;
+# endif
+
+ while ((bytes_read = safe_read (fd, buf + prev, BUFFER_SIZE - prev)) > 0)
+ {
+ char const *p;
+# if SUPPORT_OLD_MBRTOWC
+ mbstate_t backup_state;
+# endif
+ if (bytes_read == SAFE_READ_ERROR)
+ {
+ error (0, errno, "%s", quotef (file));
+ ok = false;
+ break;
+ }
+
+ bytes += bytes_read;
+ p = buf;
+ bytes_read += prev;
+ do
+ {
+ wchar_t wide_char;
+ size_t n;
+ bool wide = true;
+
+ if (!in_shift && is_basic (*p))
+ {
+ /* Handle most ASCII characters quickly, without calling
+ mbrtowc(). */
+ n = 1;
+ wide_char = *p;
+ wide = false;
+ }
+ else
+ {
+ in_shift = true;
+# if SUPPORT_OLD_MBRTOWC
+ backup_state = state;
+# endif
+ n = mbrtowc (&wide_char, p, bytes_read, &state);
+ if (n == (size_t) -2)
+ {
+# if SUPPORT_OLD_MBRTOWC
+ state = backup_state;
+# endif
+ break;
+ }
+ if (n == (size_t) -1)
+ {
+ /* Remember that we read a byte, but don't complain
+ about the error. Because of the decoding error,
+ this is a considered to be byte but not a
+ character (that is, chars is not incremented). */
+ p++;
+ bytes_read--;
+ continue;
+ }
+ if (mbsinit (&state))
+ in_shift = false;
+ if (n == 0)
+ {
+ wide_char = 0;
+ n = 1;
+ }
+ }
+
+ switch (wide_char)
+ {
+ case '\n':
+ lines++;
+ FALLTHROUGH;
+ case '\r':
+ case '\f':
+ if (linepos > linelength)
+ linelength = linepos;
+ linepos = 0;
+ goto mb_word_separator;
+ case '\t':
+ linepos += 8 - (linepos % 8);
+ goto mb_word_separator;
+ case ' ':
+ linepos++;
+ FALLTHROUGH;
+ case '\v':
+ mb_word_separator:
+ words += in_word;
+ in_word = false;
+ break;
+ default:
+ if (wide && iswprint (wide_char))
+ {
+ /* wcwidth can be expensive on OSX for example,
+ so avoid if not needed. */
+ if (print_linelength)
+ {
+ int width = wcwidth (wide_char);
+ if (width > 0)
+ linepos += width;
+ }
+ if (iswspace (wide_char) || iswnbspace (wide_char))
+ goto mb_word_separator;
+ in_word = true;
+ }
+ else if (!wide && isprint (to_uchar (*p)))
+ {
+ linepos++;
+ if (isspace (to_uchar (*p)))
+ goto mb_word_separator;
+ in_word = true;
+ }
+ break;
+ }
+
+ p += n;
+ bytes_read -= n;
+ chars++;
+ }
+ while (bytes_read > 0);
+
+# if SUPPORT_OLD_MBRTOWC
+ if (bytes_read > 0)
+ {
+ if (bytes_read == BUFFER_SIZE)
+ {
+ /* Encountered a very long redundant shift sequence. */
+ p++;
+ bytes_read--;
+ }
+ memmove (buf, p, bytes_read);
+ }
+ prev = bytes_read;
+# endif
+ }
+ if (linepos > linelength)
+ linelength = linepos;
+ words += in_word;
+ }
+#endif
+ else
+ {
+ bool in_word = false;
+ uintmax_t linepos = 0;
+
+ while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
+ {
+ char const *p = buf;
+ if (bytes_read == SAFE_READ_ERROR)
+ {
+ error (0, errno, "%s", quotef (file));
+ ok = false;
+ break;
+ }
+
+ bytes += bytes_read;
+ do
+ {
+ switch (*p++)
+ {
+ case '\n':
+ lines++;
+ FALLTHROUGH;
+ case '\r':
+ case '\f':
+ if (linepos > linelength)
+ linelength = linepos;
+ linepos = 0;
+ goto word_separator;
+ case '\t':
+ linepos += 8 - (linepos % 8);
+ goto word_separator;
+ case ' ':
+ linepos++;
+ FALLTHROUGH;
+ case '\v':
+ word_separator:
+ words += in_word;
+ in_word = false;
+ break;
+ default:
+ if (isprint (to_uchar (p[-1])))
+ {
+ linepos++;
+ if (isspace (to_uchar (p[-1]))
+ || isnbspace (to_uchar (p[-1])))
+ goto word_separator;
+ in_word = true;
+ }
+ break;
+ }
+ }
+ while (--bytes_read);
+ }
+ if (linepos > linelength)
+ linelength = linepos;
+ words += in_word;
+ }
+
+ if (count_chars < print_chars)
+ chars = bytes;
+
+ if (total_mode != total_only)
+ write_counts (lines, words, chars, bytes, linelength, file_x);
+
+ if (ckd_add (&total_lines, total_lines, lines))
+ total_lines_overflow = true;
+ if (ckd_add (&total_words, total_words, words))
+ total_words_overflow = true;
+ if (ckd_add (&total_chars, total_chars, chars))
+ total_chars_overflow = true;
+ if (ckd_add (&total_bytes, total_bytes, bytes))
+ total_bytes_overflow = true;
+
+ if (linelength > max_line_length)
+ max_line_length = linelength;
+
+ return ok;
+}
+
+static bool
+wc_file (char const *file, struct fstatus *fstatus)
+{
+ if (! file || STREQ (file, "-"))
+ {
+ have_read_stdin = true;
+ xset_binary_mode (STDIN_FILENO, O_BINARY);
+ return wc (STDIN_FILENO, file, fstatus, -1);
+ }
+ else
+ {
+ int fd = open (file, O_RDONLY | O_BINARY);
+ if (fd == -1)
+ {
+ error (0, errno, "%s", quotef (file));
+ return false;
+ }
+ else
+ {
+ bool ok = wc (fd, file, fstatus, 0);
+ if (close (fd) != 0)
+ {
+ error (0, errno, "%s", quotef (file));
+ return false;
+ }
+ return ok;
+ }
+ }
+}
+
+/* Return the file status for the NFILES files addressed by FILE.
+ Optimize the case where only one number is printed, for just one
+ file; in that case we can use a print width of 1, so we don't need
+ to stat the file. Handle the case of (nfiles == 0) in the same way;
+ that happens when we don't know how long the list of file names will be. */
+
+static struct fstatus *
+get_input_fstatus (size_t nfiles, char *const *file)
+{
+ struct fstatus *fstatus = xnmalloc (nfiles ? nfiles : 1, sizeof *fstatus);
+
+ if (nfiles == 0
+ || (nfiles == 1
+ && ((print_lines + print_words + print_chars
+ + print_bytes + print_linelength)
+ == 1)))
+ fstatus[0].failed = 1;
+ else
+ {
+ for (size_t i = 0; i < nfiles; i++)
+ fstatus[i].failed = (! file[i] || STREQ (file[i], "-")
+ ? fstat (STDIN_FILENO, &fstatus[i].st)
+ : stat (file[i], &fstatus[i].st));
+ }
+
+ return fstatus;
+}
+
+/* Return a print width suitable for the NFILES files whose status is
+ recorded in FSTATUS. Optimize the same special case that
+ get_input_fstatus optimizes. */
+
+ATTRIBUTE_PURE
+static int
+compute_number_width (size_t nfiles, struct fstatus const *fstatus)
+{
+ int width = 1;
+
+ if (0 < nfiles && fstatus[0].failed <= 0)
+ {
+ int minimum_width = 1;
+ uintmax_t regular_total = 0;
+
+ for (size_t i = 0; i < nfiles; i++)
+ if (! fstatus[i].failed)
+ {
+ if (S_ISREG (fstatus[i].st.st_mode))
+ regular_total += fstatus[i].st.st_size;
+ else
+ minimum_width = 7;
+ }
+
+ for (; 10 <= regular_total; regular_total /= 10)
+ width++;
+ if (width < minimum_width)
+ width = minimum_width;
+ }
+
+ return width;
+}
+
+
+int
+main (int argc, char **argv)
+{
+ bool ok;
+ int optc;
+ size_t nfiles;
+ char **files;
+ char *files_from = nullptr;
+ struct fstatus *fstatus;
+ struct Tokens tok;
+
+ initialize_main (&argc, &argv);
+ set_program_name (argv[0]);
+ setlocale (LC_ALL, "");
+ bindtextdomain (PACKAGE, LOCALEDIR);
+ textdomain (PACKAGE);
+
+ atexit (close_stdout);
+
+ page_size = getpagesize ();
+ /* Line buffer stdout to ensure lines are written atomically and immediately
+ so that processes running in parallel do not intersperse their output. */
+ setvbuf (stdout, nullptr, _IOLBF, 0);
+
+ posixly_correct = (getenv ("POSIXLY_CORRECT") != nullptr);
+
+ print_lines = print_words = print_chars = print_bytes = false;
+ print_linelength = false;
+ total_lines = total_words = total_chars = total_bytes = max_line_length = 0;
+
+ while ((optc = getopt_long (argc, argv, "clLmw", longopts, nullptr)) != -1)
+ switch (optc)
+ {
+ case 'c':
+ print_bytes = true;
+ break;
+
+ case 'm':
+ print_chars = true;
+ break;
+
+ case 'l':
+ print_lines = true;
+ break;
+
+ case 'w':
+ print_words = true;
+ break;
+
+ case 'L':
+ print_linelength = true;
+ break;
+
+ case DEBUG_PROGRAM_OPTION:
+ debug = true;
+ break;
+
+ case FILES0_FROM_OPTION:
+ files_from = optarg;
+ break;
+
+ case TOTAL_OPTION:
+ total_mode = XARGMATCH ("--total", optarg, total_args, total_types);
+ break;
+
+ case_GETOPT_HELP_CHAR;
+
+ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
+
+ default:
+ usage (EXIT_FAILURE);
+ }
+
+ if (! (print_lines || print_words || print_chars || print_bytes
+ || print_linelength))
+ print_lines = print_words = print_bytes = true;
+
+ bool read_tokens = false;
+ struct argv_iterator *ai;
+ if (files_from)
+ {
+ FILE *stream;
+
+ /* When using --files0-from=F, you may not specify any files
+ on the command-line. */
+ if (optind < argc)
+ {
+ error (0, 0, _("extra operand %s"), quoteaf (argv[optind]));
+ fprintf (stderr, "%s\n",
+ _("file operands cannot be combined with --files0-from"));
+ usage (EXIT_FAILURE);
+ }
+
+ if (STREQ (files_from, "-"))
+ stream = stdin;
+ else
+ {
+ stream = fopen (files_from, "r");
+ if (stream == nullptr)
+ error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
+ quoteaf (files_from));
+ }
+
+ /* Read the file list into RAM if we can detect its size and that
+ size is reasonable. Otherwise, we'll read a name at a time. */
+ struct stat st;
+ if (fstat (fileno (stream), &st) == 0
+ && S_ISREG (st.st_mode)
+ && st.st_size <= MIN (10 * 1024 * 1024, physmem_available () / 2))
+ {
+ read_tokens = true;
+ readtokens0_init (&tok);
+ if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
+ error (EXIT_FAILURE, 0, _("cannot read file names from %s"),
+ quoteaf (files_from));
+ files = tok.tok;
+ nfiles = tok.n_tok;
+ ai = argv_iter_init_argv (files);
+ }
+ else
+ {
+ files = nullptr;
+ nfiles = 0;
+ ai = argv_iter_init_stream (stream);
+ }
+ }
+ else
+ {
+ static char *stdin_only[] = { nullptr };
+ files = (optind < argc ? argv + optind : stdin_only);
+ nfiles = (optind < argc ? argc - optind : 1);
+ ai = argv_iter_init_argv (files);
+ }
+
+ if (!ai)
+ xalloc_die ();
+
+ fstatus = get_input_fstatus (nfiles, files);
+ if (total_mode == total_only)
+ number_width = 1; /* No extra padding, since no alignment requirement. */
+ else
+ number_width = compute_number_width (nfiles, fstatus);
+
+ ok = true;
+ for (int i = 0; /* */; i++)
+ {
+ bool skip_file = false;
+ enum argv_iter_err ai_err;
+ char *file_name = argv_iter (ai, &ai_err);
+ if (!file_name)
+ {
+ switch (ai_err)
+ {
+ case AI_ERR_EOF:
+ goto argv_iter_done;
+ case AI_ERR_READ:
+ error (0, errno, _("%s: read error"),
+ quotef (files_from));
+ ok = false;
+ goto argv_iter_done;
+ case AI_ERR_MEM:
+ xalloc_die ();
+ default:
+ affirm (!"unexpected error code from argv_iter");
+ }
+ }
+ if (files_from && STREQ (files_from, "-") && STREQ (file_name, "-"))
+ {
+ /* Give a better diagnostic in an unusual case:
+ printf - | wc --files0-from=- */
+ error (0, 0, _("when reading file names from stdin, "
+ "no file name of %s allowed"),
+ quoteaf (file_name));
+ skip_file = true;
+ }
+
+ if (!file_name[0])
+ {
+ /* Diagnose a zero-length file name. When it's one
+ among many, knowing the record number may help.
+ FIXME: currently print the record number only with
+ --files0-from=FILE. Maybe do it for argv, too? */
+ if (files_from == nullptr)
+ error (0, 0, "%s", _("invalid zero-length file name"));
+ else
+ {
+ /* Using the standard 'filename:line-number:' prefix here is
+ not totally appropriate, since NUL is the separator, not NL,
+ but it might be better than nothing. */
+ unsigned long int file_number = argv_iter_n_args (ai);
+ error (0, 0, "%s:%lu: %s", quotef (files_from),
+ file_number, _("invalid zero-length file name"));
+ }
+ skip_file = true;
+ }
+
+ if (skip_file)
+ ok = false;
+ else
+ ok &= wc_file (file_name, &fstatus[nfiles ? i : 0]);
+
+ if (! nfiles)
+ fstatus[0].failed = 1;
+ }
+ argv_iter_done:
+
+ /* No arguments on the command line is fine. That means read from stdin.
+ However, no arguments on the --files0-from input stream is an error
+ means don't read anything. */
+ if (ok && !files_from && argv_iter_n_args (ai) == 0)
+ ok &= wc_file (nullptr, &fstatus[0]);
+
+ if (read_tokens)
+ readtokens0_free (&tok);
+
+ if (total_mode != total_never
+ && (total_mode != total_auto || 1 < argv_iter_n_args (ai)))
+ {
+ if (total_lines_overflow)
+ {
+ total_lines = UINTMAX_MAX;
+ error (0, EOVERFLOW, _("total lines"));
+ ok = false;
+ }
+ if (total_words_overflow)
+ {
+ total_words = UINTMAX_MAX;
+ error (0, EOVERFLOW, _("total words"));
+ ok = false;
+ }
+ if (total_chars_overflow)
+ {
+ total_chars = UINTMAX_MAX;
+ error (0, EOVERFLOW, _("total characters"));
+ ok = false;
+ }
+ if (total_bytes_overflow)
+ {
+ total_bytes = UINTMAX_MAX;
+ error (0, EOVERFLOW, _("total bytes"));
+ ok = false;
+ }
+
+ write_counts (total_lines, total_words, total_chars, total_bytes,
+ max_line_length,
+ total_mode != total_only ? _("total") : nullptr);
+ }
+
+ argv_iter_free (ai);
+
+ free (fstatus);
+
+ if (have_read_stdin && close (STDIN_FILENO) != 0)
+ error (EXIT_FAILURE, errno, "-");
+
+ return ok ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/historical/plan9-wc.c b/historical/plan9-wc.c
@@ -0,0 +1,118 @@
+/*
+ * Count bytes within runes, if it fits in a uvlong, and other things.
+ */
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+
+/* flags, per-file counts, and total counts */
+static int pline, pword, prune, pbadr, pchar;
+static uvlong nline, nword, nrune, nbadr, nchar;
+static uvlong tnline, tnword, tnrune, tnbadr, tnchar;
+
+enum{Space, Word};
+
+static void
+wc(Biobuf *bin)
+{
+ int where;
+ long r;
+
+ nline = 0;
+ nword = 0;
+ nrune = 0;
+ nbadr = 0;
+ where = Space;
+ while ((long)(r = Bgetrune(bin)) >= 0) {
+ nrune++;
+ if(r == Runeerror) {
+ nbadr++;
+ continue;
+ }
+ if(r == '\n')
+ nline++;
+ if(where == Word){
+ if(isspacerune(r))
+ where = Space;
+ }else
+ if(isspacerune(r) == 0){
+ where = Word;
+ nword++;
+ }
+ }
+ nchar = Boffset(bin);
+ tnline += nline;
+ tnword += nword;
+ tnrune += nrune;
+ tnbadr += nbadr;
+ tnchar += nchar;
+}
+
+static void
+report(uvlong nline, uvlong nword, uvlong nrune, uvlong nbadr, uvlong nchar, char *fname)
+{
+ char line[1024], *s, *e;
+
+ s = line;
+ e = line + sizeof line;
+ line[0] = 0;
+ if(pline)
+ s = seprint(s, e, " %7llud", nline);
+ if(pword)
+ s = seprint(s, e, " %7llud", nword);
+ if(prune)
+ s = seprint(s, e, " %7llud", nrune);
+ if(pbadr)
+ s = seprint(s, e, " %7llud", nbadr);
+ if(pchar)
+ s = seprint(s, e, " %7llud", nchar);
+ if(fname != nil)
+ seprint(s, e, " %s", fname);
+ print("%s\n", line+1);
+}
+
+void
+main(int argc, char *argv[])
+{
+ char *sts;
+ Biobuf sin, *bin;
+ int i;
+
+ sts = nil;
+ ARGBEGIN {
+ case 'l': pline++; break;
+ case 'w': pword++; break;
+ case 'r': prune++; break;
+ case 'b': pbadr++; break;
+ case 'c': pchar++; break;
+ default:
+ fprint(2, "Usage: %s [-lwrbc] [file ...]\n", argv0);
+ exits("usage");
+ } ARGEND
+ if(pline+pword+prune+pbadr+pchar == 0){
+ pline = 1;
+ pword = 1;
+ pchar = 1;
+ }
+ if(argc == 0){
+ Binit(&sin, 0, OREAD);
+ wc(&sin);
+ report(nline, nword, nrune, nbadr, nchar, nil);
+ Bterm(&sin);
+ }else{
+ for(i = 0; i < argc; i++){
+ bin = Bopen(argv[i], OREAD);
+ if(bin == nil){
+ perror(argv[i]);
+ sts = "can't open";
+ continue;
+ }
+ wc(bin);
+ report(nline, nword, nrune, nbadr, nchar, argv[i]);
+ Bterm(bin);
+ }
+ if(argc>1)
+ report(tnline, tnword, tnrune, tnbadr, tnchar, "total");
+ }
+ exits(sts);
+}
diff --git a/historical/plan9port-wc.c b/historical/plan9port-wc.c
@@ -0,0 +1,352 @@
+/*
+ * wc -- count things in utf-encoded text files
+ * Bugs:
+ * The only white space characters recognized are ' ', '\t' and '\n', even though
+ * ISO 10646 has many more blanks scattered through it.
+ * Should count characters that cannot occur in any rune (hex f0-ff) separately.
+ * Should count non-canonical runes (e.g. hex c1,80 instead of hex 40).
+ */
+#include <u.h>
+#include <libc.h>
+#define NBUF (8*1024)
+uvlong nline, tnline, pline;
+uvlong nword, tnword, pword;
+uvlong nrune, tnrune, prune;
+uvlong nbadr, tnbadr, pbadr;
+uvlong nchar, tnchar, pchar;
+void count(int, char *);
+void report(uvlong, uvlong, uvlong, uvlong, uvlong, char *);
+void
+main(int argc, char *argv[])
+{
+ char *status="";
+ int i, f;
+ ARGBEGIN {
+ case 'l': pline++; break;
+ case 'w': pword++; break;
+ case 'r': prune++; break;
+ case 'b': pbadr++; break;
+ case 'c': pchar++; break;
+ default:
+ fprint(2, "Usage: %s [-lwrbc] [file ...]\n", argv0);
+ exits("usage");
+ } ARGEND
+ if(pline+pword+prune+pbadr+pchar == 0) {
+ pline = 1;
+ pword = 1;
+ pchar = 1;
+ }
+ if(argc==0)
+ count(0, 0);
+ else{
+ for(i=0;i<argc;i++){
+ f=open(argv[i], OREAD);
+ if(f<0){
+ perror(argv[i]);
+ status="can't open";
+ }
+ else{
+ count(f, argv[i]);
+ tnline+=nline;
+ tnword+=nword;
+ tnrune+=nrune;
+ tnbadr+=nbadr;
+ tnchar+=nchar;
+ close(f);
+ }
+ }
+ if(argc>1)
+ report(tnline, tnword, tnrune, tnbadr, tnchar, "total");
+ }
+ exits(status);
+}
+void
+report(uvlong nline, uvlong nword, uvlong nrune, uvlong nbadr, uvlong nchar, char *fname)
+{
+ char line[1024], word[128];
+ line[0] = '\0';
+ if(pline){
+ sprint(word, " %7llud", nline);
+ strcat(line, word);
+ }
+ if(pword){
+ sprint(word, " %7llud", nword);
+ strcat(line, word);
+ }
+ if(prune){
+ sprint(word, " %7llud", nrune);
+ strcat(line, word);
+ }
+ if(pbadr){
+ sprint(word, " %7llud", nbadr);
+ strcat(line, word);
+ }
+ if(pchar){
+ sprint(word, " %7llud", nchar);
+ strcat(line, word);
+ }
+ if(fname){
+ sprint(word, " %s", fname);
+ strcat(line, word);
+ }
+ print("%s\n", line+1);
+}
+/*
+ * How it works. Start in statesp. Each time we read a character,
+ * increment various counts, and do state transitions according to the
+ * following table. If we're not in statesp or statewd when done, the
+ * file ends with a partial rune.
+ * | character
+ * state |09,20| 0a |00-7f|80-bf|c0-df|e0-ef|f0-f7|f8-ff
+ * -------+-----+-----+-----+-----+-----+-----+-----+-----
+ * statesp|ASP |ASPN |AWDW |AWDWX|AC2W |AC3W |AC4W |AWDWX
+ * statewd|ASP |ASPN |AWD |AWDX |AC2 |AC3 |AC4 |AWDX
+ * statec2|ASPX |ASPNX|AWDX |AWDR |AC2X |AC3X |AC4X |AWDX
+ * statec3|ASPX |ASPNX|AWDX |AC2R |AC2X |AC3X |AC4X |AWDX
+ * statec4|ASPX |ASPNX|AWDX |AC3R |AC2X |AC3X |AC4X |AWDX f4 8f bf bf
+ */
+enum{ /* actions */
+ AC2, /* enter statec2 */
+ AC2R, /* enter statec2, don't count a rune */
+ AC2W, /* enter statec2, count a word */
+ AC2X, /* enter statec2, count a bad rune */
+ AC3, /* enter statec3 */
+ AC3R, /* enter statec3, don't count a rune */
+ AC3W, /* enter statec3, count a word */
+ AC3X, /* enter statec3, count a bad rune */
+ AC4, /* enter statec4 */
+ AC4W, /* enter statec4, count a word */
+ AC4X, /* enter statec4, count a bad rune */
+ ASP, /* enter statesp */
+ ASPN, /* enter statesp, count a newline */
+ ASPNX, /* enter statesp, count a newline, count a bad rune */
+ ASPX, /* enter statesp, count a bad rune */
+ AWD, /* enter statewd */
+ AWDR, /* enter statewd, don't count a rune */
+ AWDW, /* enter statewd, count a word */
+ AWDWX, /* enter statewd, count a word, count a bad rune */
+ AWDX, /* enter statewd, count a bad rune */
+};
+uchar statesp[256]={ /* looking for the start of a word */
+AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, /* 00-07 */
+AWDW, ASP, ASPN, AWDW, AWDW, AWDW, AWDW, AWDW, /* 08-0f */
+AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, /* 10-17 */
+AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, /* 18-1f */
+ASP, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, /* 20-27 */
+AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, /* 28-2f */
+AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, /* 30-37 */
+AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, /* 38-3f */
+AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, /* 40-47 */
+AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, /* 48-4f */
+AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, /* 50-57 */
+AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, /* 58-5f */
+AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, /* 60-67 */
+AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, /* 68-6f */
+AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, /* 70-77 */
+AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, AWDW, /* 78-7f */
+AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,/* 80-87 */
+AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,/* 88-8f */
+AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,/* 90-97 */
+AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,/* 98-9f */
+AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,/* a0-a7 */
+AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,/* a8-af */
+AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,/* b0-b7 */
+AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,/* b8-bf */
+AC2W, AC2W, AC2W, AC2W, AC2W, AC2W, AC2W, AC2W, /* c0-c7 */
+AC2W, AC2W, AC2W, AC2W, AC2W, AC2W, AC2W, AC2W, /* c8-cf */
+AC2W, AC2W, AC2W, AC2W, AC2W, AC2W, AC2W, AC2W, /* d0-d7 */
+AC2W, AC2W, AC2W, AC2W, AC2W, AC2W, AC2W, AC2W, /* d8-df */
+AC3W, AC3W, AC3W, AC3W, AC3W, AC3W, AC3W, AC3W, /* e0-e7 */
+AC3W, AC3W, AC3W, AC3W, AC3W, AC3W, AC3W, AC3W, /* e8-ef */
+AC4W, AC4W, AC4W, AC4W, AC4W, AC4W, AC4W, AC4W, /* f0-f7 */
+AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,AWDWX,/* f8-ff */
+};
+uchar statewd[256]={ /* looking for the next character in a word */
+AWD, AWD, AWD, AWD, AWD, AWD, AWD, AWD, /* 00-07 */
+AWD, ASP, ASPN, AWD, AWD, AWD, AWD, AWD, /* 08-0f */
+AWD, AWD, AWD, AWD, AWD, AWD, AWD, AWD, /* 10-17 */
+AWD, AWD, AWD, AWD, AWD, AWD, AWD, AWD, /* 18-1f */
+ASP, AWD, AWD, AWD, AWD, AWD, AWD, AWD, /* 20-27 */
+AWD, AWD, AWD, AWD, AWD, AWD, AWD, AWD, /* 28-2f */
+AWD, AWD, AWD, AWD, AWD, AWD, AWD, AWD, /* 30-37 */
+AWD, AWD, AWD, AWD, AWD, AWD, AWD, AWD, /* 38-3f */
+AWD, AWD, AWD, AWD, AWD, AWD, AWD, AWD, /* 40-47 */
+AWD, AWD, AWD, AWD, AWD, AWD, AWD, AWD, /* 48-4f */
+AWD, AWD, AWD, AWD, AWD, AWD, AWD, AWD, /* 50-57 */
+AWD, AWD, AWD, AWD, AWD, AWD, AWD, AWD, /* 58-5f */
+AWD, AWD, AWD, AWD, AWD, AWD, AWD, AWD, /* 60-67 */
+AWD, AWD, AWD, AWD, AWD, AWD, AWD, AWD, /* 68-6f */
+AWD, AWD, AWD, AWD, AWD, AWD, AWD, AWD, /* 70-77 */
+AWD, AWD, AWD, AWD, AWD, AWD, AWD, AWD, /* 78-7f */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 80-87 */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 88-8f */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 90-97 */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 98-9f */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* a0-a7 */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* a8-af */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* b0-b7 */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* b8-bf */
+AC2, AC2, AC2, AC2, AC2, AC2, AC2, AC2, /* c0-c7 */
+AC2, AC2, AC2, AC2, AC2, AC2, AC2, AC2, /* c8-cf */
+AC2, AC2, AC2, AC2, AC2, AC2, AC2, AC2, /* d0-d7 */
+AC2, AC2, AC2, AC2, AC2, AC2, AC2, AC2, /* d8-df */
+AC3, AC3, AC3, AC3, AC3, AC3, AC3, AC3, /* e0-e7 */
+AC3, AC3, AC3, AC3, AC3, AC3, AC3, AC3, /* e8-ef */
+AC4, AC4, AC4, AC4, AC4, AC4, AC4, AC4, /* f0-f7 */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* f8-ff */
+};
+uchar statec2[256]={ /* looking for 10xxxxxx to complete a rune */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 00-07 */
+AWDX, ASPX, ASPNX,AWDX, AWDX, AWDX, AWDX, AWDX, /* 08-0f */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 10-17 */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 18-1f */
+ASPX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 20-27 */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 28-2f */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 30-37 */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 38-3f */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 40-47 */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 48-4f */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 50-57 */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 58-5f */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 60-67 */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 68-6f */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 70-77 */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 78-7f */
+AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, /* 80-87 */
+AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, /* 88-8f */
+AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, /* 90-97 */
+AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, /* 98-9f */
+AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, /* a0-a7 */
+AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, /* a8-af */
+AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, /* b0-b7 */
+AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, AWDR, /* b8-bf */
+AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, /* c0-c7 */
+AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, /* c8-cf */
+AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, /* d0-d7 */
+AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, /* d8-df */
+AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, /* e0-e7 */
+AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, /* e8-ef */
+AC4X, AC4X, AC4X, AC4X, AC4X, AC4X, AC4X, AC4X, /* f0-f7 */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* f8-ff */
+};
+uchar statec3[256]={ /* looking for 10xxxxxx,10xxxxxx to complete a rune */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 00-07 */
+AWDX, ASPX, ASPNX,AWDX, AWDX, AWDX, AWDX, AWDX, /* 08-0f */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 10-17 */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 18-1f */
+ASPX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 20-27 */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 28-2f */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 30-37 */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 38-3f */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 40-47 */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 48-4f */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 50-57 */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 58-5f */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 60-67 */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 68-6f */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 70-77 */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 78-7f */
+AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, /* 80-87 */
+AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, /* 88-8f */
+AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, /* 90-97 */
+AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, /* 98-9f */
+AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, /* a0-a7 */
+AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, /* a8-af */
+AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, /* b0-b7 */
+AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, AC2R, /* b8-bf */
+AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, /* c0-c7 */
+AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, /* c8-cf */
+AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, /* d0-d7 */
+AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, /* d8-df */
+AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, /* e0-e7 */
+AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, /* e8-ef */
+AC4X, AC4X, AC4X, AC4X, AC4X, AC4X, AC4X, AC4X, /* f0-f7 */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* f8-ff */
+};
+uchar statec4[256]={ /* looking for 10xxxxxx,10xxxxxx,10xxxxxx to complete a rune */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 00-07 */
+AWDX, ASPX, ASPNX,AWDX, AWDX, AWDX, AWDX, AWDX, /* 08-0f */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 10-17 */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 18-1f */
+ASPX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 20-27 */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 28-2f */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 30-37 */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 38-3f */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 40-47 */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 48-4f */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 50-57 */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 58-5f */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 60-67 */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 68-6f */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 70-77 */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 78-7f */
+AC3R, AC3R, AC3R, AC3R, AC3R, AC3R, AC3R, AC3R, /* 80-87 */
+AC3R, AC3R, AC3R, AC3R, AC3R, AC3R, AC3R, AC3R, /* 88-8f */
+AC3R, AC3R, AC3R, AC3R, AC3R, AC3R, AC3R, AC3R, /* 90-97 */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* 98-9f */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* a0-a7 */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* a8-af */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* b0-b7 */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* b8-bf */
+AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, /* c0-c7 */
+AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, /* c8-cf */
+AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, /* d0-d7 */
+AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, AC2X, /* d8-df */
+AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, /* e0-e7 */
+AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, AC3X, /* e8-ef */
+AC4X, AC4X, AC4X, AC4X, AC4X, AC4X, AC4X, AC4X, /* f0-f7 */
+AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, AWDX, /* f8-ff */
+};
+void
+count(int f, char *name)
+{
+ int n;
+ uchar buf[NBUF];
+ uchar *bufp, *ebuf;
+ uchar *state=statesp;
+
+ nline = 0;
+ nword = 0;
+ nrune = 0;
+ nbadr = 0;
+ nchar = 0;
+
+ for(;;){
+ n=read(f, buf, NBUF);
+ if(n<=0)
+ break;
+ nchar+=n;
+ nrune+=n; /* might be too large, gets decreased later */
+ bufp=buf;
+ ebuf=buf+n;
+ do{
+ switch(state[*bufp]){
+ case AC2: state=statec2; break;
+ case AC2R: state=statec2; --nrune; break;
+ case AC2W: state=statec2; nword++; break;
+ case AC2X: state=statec2; nbadr++; break;
+ case AC3: state=statec3; break;
+ case AC3R: state=statec3; --nrune; break;
+ case AC3W: state=statec3; nword++; break;
+ case AC3X: state=statec3; nbadr++; break;
+ case AC4: state=statec4; break;
+ case AC4W: state=statec4; nword++; break;
+ case AC4X: state=statec4; nbadr++; break;
+ case ASP: state=statesp; break;
+ case ASPN: state=statesp; nline++; break;
+ case ASPNX: state=statesp; nline++; nbadr++; break;
+ case ASPX: state=statesp; nbadr++; break;
+ case AWD: state=statewd; break;
+ case AWDR: state=statewd; --nrune; break;
+ case AWDW: state=statewd; nword++; break;
+ case AWDWX: state=statewd; nword++; nbadr++; break;
+ case AWDX: state=statewd; nbadr++; break;
+ }
+ }while(++bufp!=ebuf);
+ }
+ if(state!=statesp && state!=statewd)
+ nbadr++;
+ if(n<0)
+ fprint(2, "%s: %r\n", name);
+ report(nline, nword, nrune, nbadr, nchar, name);
+}
diff --git a/historical/unix-v7-wc.c b/historical/unix-v7-wc.c
@@ -0,0 +1,86 @@
+/* wc line and word count */
+
+#include <stdio.h>
+
+main(argc, argv)
+char **argv;
+{
+ int i, token;
+ register FILE *fp;
+ long linect, wordct, charct;
+ long tlinect=0, twordct=0, tcharct=0;
+ char *wd;
+ register int c;
+
+ wd = "lwc";
+ if(argc > 1 && *argv[1] == '-') {
+ wd = ++argv[1];
+ argc--;
+ argv++;
+ }
+
+ i = 1;
+ fp = stdin;
+ do {
+ if(argc>1 && (fp=fopen(argv[i], "r")) == NULL) {
+ fprintf(stderr, "wc: can't open %s\n", argv[i]);
+ continue;
+ }
+ linect = 0;
+ wordct = 0;
+ charct = 0;
+ token = 0;
+ for(;;) {
+ c = getc(fp);
+ if (c == EOF)
+ break;
+ charct++;
+ if(' '<c&&c<0177) {
+ if(!token) {
+ wordct++;
+ token++;
+ }
+ continue;
+ }
+ if(c=='\n')
+ linect++;
+ else if(c!=' '&&c!='\t')
+ continue;
+ token = 0;
+ }
+ /* print lines, words, chars */
+ wcp(wd, charct, wordct, linect);
+ if(argc>1) {
+ printf(" %s\n", argv[i]);
+ } else
+ printf("\n");
+ fclose(fp);
+ tlinect += linect;
+ twordct += wordct;
+ tcharct += charct;
+ } while(++i<argc);
+ if(argc > 2) {
+ wcp(wd, tcharct, twordct, tlinect);
+ printf(" total\n");
+ }
+ exit(0);
+}
+
+wcp(wd, charct, wordct, linect)
+register char *wd;
+long charct; long wordct; long linect;
+{
+ while (*wd) switch (*wd++) {
+ case 'l':
+ printf("%7ld", linect);
+ break;
+
+ case 'w':
+ printf("%7ld ", wordct);
+ break;
+
+ case 'c':
+ printf("%7ld", charct);
+ break;
+ }
+}