wc

Count words in <50 lines of C
Log | Files | Refs | README

gnu-wc.c (30690B)


      1 /* wc - print the number of lines, words, and bytes in files
      2    Copyright (C) 1985-2023 Free Software Foundation, Inc.
      3 
      4    This program is free software: you can redistribute it and/or modify
      5    it under the terms of the GNU General Public License as published by
      6    the Free Software Foundation, either version 3 of the License, or
      7    (at your option) any later version.
      8 
      9    This program is distributed in the hope that it will be useful,
     10    but WITHOUT ANY WARRANTY; without even the implied warranty of
     11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     12    GNU General Public License for more details.
     13 
     14    You should have received a copy of the GNU General Public License
     15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
     16 
     17 /* Written by Paul Rubin, phr@ocf.berkeley.edu
     18    and David MacKenzie, djm@gnu.ai.mit.edu. */
     19 
     20 #include <config.h>
     21 
     22 #include <stdckdint.h>
     23 #include <stdio.h>
     24 #include <getopt.h>
     25 #include <sys/types.h>
     26 #include <wchar.h>
     27 #include <wctype.h>
     28 
     29 #include "system.h"
     30 #include "assure.h"
     31 #include "argmatch.h"
     32 #include "argv-iter.h"
     33 #include "fadvise.h"
     34 #include "mbchar.h"
     35 #include "physmem.h"
     36 #include "readtokens0.h"
     37 #include "safe-read.h"
     38 #include "stat-size.h"
     39 #include "xbinary-io.h"
     40 
     41 #if !defined iswspace && !HAVE_ISWSPACE
     42 # define iswspace(wc) \
     43     ((wc) == to_uchar (wc) && isspace (to_uchar (wc)))
     44 #endif
     45 
     46 /* The official name of this program (e.g., no 'g' prefix).  */
     47 #define PROGRAM_NAME "wc"
     48 
     49 #define AUTHORS \
     50   proper_name ("Paul Rubin"), \
     51   proper_name ("David MacKenzie")
     52 
     53 /* Size of atomic reads. */
     54 #define BUFFER_SIZE (16 * 1024)
     55 
     56 #ifdef USE_AVX2_WC_LINECOUNT
     57 /* From wc_avx2.c */
     58 extern bool
     59 wc_lines_avx2 (char const *file, int fd, uintmax_t *lines_out,
     60                uintmax_t *bytes_out);
     61 #endif
     62 
     63 static bool debug;
     64 
     65 /* Cumulative number of lines, words, chars and bytes in all files so far.
     66    max_line_length is the maximum over all files processed so far.  */
     67 static uintmax_t total_lines;
     68 static uintmax_t total_words;
     69 static uintmax_t total_chars;
     70 static uintmax_t total_bytes;
     71 static uintmax_t total_lines_overflow;
     72 static uintmax_t total_words_overflow;
     73 static uintmax_t total_chars_overflow;
     74 static uintmax_t total_bytes_overflow;
     75 static uintmax_t max_line_length;
     76 
     77 /* Which counts to print. */
     78 static bool print_lines, print_words, print_chars, print_bytes;
     79 static bool print_linelength;
     80 
     81 /* The print width of each count.  */
     82 static int number_width;
     83 
     84 /* True if we have ever read the standard input. */
     85 static bool have_read_stdin;
     86 
     87 /* Used to determine if file size can be determined without reading.  */
     88 static size_t page_size;
     89 
     90 /* Enable to _not_ treat non breaking space as a word separator.  */
     91 static bool posixly_correct;
     92 
     93 /* The result of calling fstat or stat on a file descriptor or file.  */
     94 struct fstatus
     95 {
     96   /* If positive, fstat or stat has not been called yet.  Otherwise,
     97      this is the value returned from fstat or stat.  */
     98   int failed;
     99 
    100   /* If FAILED is zero, this is the file's status.  */
    101   struct stat st;
    102 };
    103 
    104 /* For long options that have no equivalent short option, use a
    105    non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
    106 enum
    107 {
    108   DEBUG_PROGRAM_OPTION = CHAR_MAX + 1,
    109   FILES0_FROM_OPTION,
    110   TOTAL_OPTION,
    111 };
    112 
    113 static struct option const longopts[] =
    114 {
    115   {"bytes", no_argument, nullptr, 'c'},
    116   {"chars", no_argument, nullptr, 'm'},
    117   {"lines", no_argument, nullptr, 'l'},
    118   {"words", no_argument, nullptr, 'w'},
    119   {"debug", no_argument, nullptr, DEBUG_PROGRAM_OPTION},
    120   {"files0-from", required_argument, nullptr, FILES0_FROM_OPTION},
    121   {"max-line-length", no_argument, nullptr, 'L'},
    122   {"total", required_argument, nullptr, TOTAL_OPTION},
    123   {GETOPT_HELP_OPTION_DECL},
    124   {GETOPT_VERSION_OPTION_DECL},
    125   {nullptr, 0, nullptr, 0}
    126 };
    127 
    128 enum total_type
    129   {
    130     total_auto,         /* 0: default or --total=auto */
    131     total_always,       /* 1: --total=always */
    132     total_only,         /* 2: --total=only */
    133     total_never         /* 3: --total=never */
    134   };
    135 static char const *const total_args[] =
    136 {
    137   "auto", "always", "only", "never", nullptr
    138 };
    139 static enum total_type const total_types[] =
    140 {
    141   total_auto, total_always, total_only, total_never
    142 };
    143 ARGMATCH_VERIFY (total_args, total_types);
    144 static enum total_type total_mode = total_auto;
    145 
    146 #ifdef USE_AVX2_WC_LINECOUNT
    147 static bool
    148 avx2_supported (void)
    149 {
    150   bool avx_enabled = 0 < __builtin_cpu_supports ("avx2");
    151 
    152   if (debug)
    153     error (0, 0, (avx_enabled
    154                   ? _("using avx2 hardware support")
    155                   : _("avx2 support not detected")));
    156 
    157   return avx_enabled;
    158 }
    159 #endif
    160 
    161 void
    162 usage (int status)
    163 {
    164   if (status != EXIT_SUCCESS)
    165     emit_try_help ();
    166   else
    167     {
    168       printf (_("\
    169 Usage: %s [OPTION]... [FILE]...\n\
    170   or:  %s [OPTION]... --files0-from=F\n\
    171 "),
    172               program_name, program_name);
    173       fputs (_("\
    174 Print newline, word, and byte counts for each FILE, and a total line if\n\
    175 more than one FILE is specified.  A word is a non-zero-length sequence of\n\
    176 printable characters delimited by white space.\n\
    177 "), stdout);
    178 
    179       emit_stdin_note ();
    180 
    181       fputs (_("\
    182 \n\
    183 The options below may be used to select which counts are printed, always in\n\
    184 the following order: newline, word, character, byte, maximum line length.\n\
    185   -c, --bytes            print the byte counts\n\
    186   -m, --chars            print the character counts\n\
    187   -l, --lines            print the newline counts\n\
    188 "), stdout);
    189       fputs (_("\
    190       --files0-from=F    read input from the files specified by\n\
    191                            NUL-terminated names in file F;\n\
    192                            If F is - then read names from standard input\n\
    193   -L, --max-line-length  print the maximum display width\n\
    194   -w, --words            print the word counts\n\
    195 "), stdout);
    196       fputs (_("\
    197       --total=WHEN       when to print a line with total counts;\n\
    198                            WHEN can be: auto, always, only, never\n\
    199 "), stdout);
    200       fputs (HELP_OPTION_DESCRIPTION, stdout);
    201       fputs (VERSION_OPTION_DESCRIPTION, stdout);
    202       emit_ancillary_info (PROGRAM_NAME);
    203     }
    204   exit (status);
    205 }
    206 
    207 /* Return non zero if a non breaking space.  */
    208 ATTRIBUTE_PURE
    209 static int
    210 iswnbspace (wint_t wc)
    211 {
    212   return ! posixly_correct
    213          && (wc == 0x00A0 || wc == 0x2007
    214              || wc == 0x202F || wc == 0x2060);
    215 }
    216 
    217 static int
    218 isnbspace (int c)
    219 {
    220   return iswnbspace (btowc (c));
    221 }
    222 
    223 /* FILE is the name of the file (or null for standard input)
    224    associated with the specified counters.  */
    225 static void
    226 write_counts (uintmax_t lines,
    227               uintmax_t words,
    228               uintmax_t chars,
    229               uintmax_t bytes,
    230               uintmax_t linelength,
    231               char const *file)
    232 {
    233   static char const format_sp_int[] = " %*s";
    234   char const *format_int = format_sp_int + 1;
    235   char buf[INT_BUFSIZE_BOUND (uintmax_t)];
    236 
    237   if (print_lines)
    238     {
    239       printf (format_int, number_width, umaxtostr (lines, buf));
    240       format_int = format_sp_int;
    241     }
    242   if (print_words)
    243     {
    244       printf (format_int, number_width, umaxtostr (words, buf));
    245       format_int = format_sp_int;
    246     }
    247   if (print_chars)
    248     {
    249       printf (format_int, number_width, umaxtostr (chars, buf));
    250       format_int = format_sp_int;
    251     }
    252   if (print_bytes)
    253     {
    254       printf (format_int, number_width, umaxtostr (bytes, buf));
    255       format_int = format_sp_int;
    256     }
    257   if (print_linelength)
    258     {
    259       printf (format_int, number_width, umaxtostr (linelength, buf));
    260     }
    261   if (file)
    262     printf (" %s", strchr (file, '\n') ? quotef (file) : file);
    263   putchar ('\n');
    264 }
    265 
    266 static bool
    267 wc_lines (char const *file, int fd, uintmax_t *lines_out, uintmax_t *bytes_out)
    268 {
    269   size_t bytes_read;
    270   uintmax_t lines, bytes;
    271   char buf[BUFFER_SIZE + 1];
    272   bool long_lines = false;
    273 
    274   if (!lines_out || !bytes_out)
    275     {
    276       return false;
    277     }
    278 
    279   lines = bytes = 0;
    280 
    281   while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
    282     {
    283 
    284       if (bytes_read == SAFE_READ_ERROR)
    285         {
    286           error (0, errno, "%s", quotef (file));
    287           return false;
    288         }
    289 
    290       bytes += bytes_read;
    291 
    292       char *p = buf;
    293       char *end = buf + bytes_read;
    294       uintmax_t plines = lines;
    295 
    296       if (! long_lines)
    297         {
    298           /* Avoid function call overhead for shorter lines.  */
    299           while (p != end)
    300             lines += *p++ == '\n';
    301         }
    302       else
    303         {
    304           /* rawmemchr is more efficient with longer lines.  */
    305           *end = '\n';
    306           while ((p = rawmemchr (p, '\n')) < end)
    307             {
    308               ++p;
    309               ++lines;
    310             }
    311         }
    312 
    313       /* If the average line length in the block is >= 15, then use
    314           memchr for the next block, where system specific optimizations
    315           may outweigh function call overhead.
    316           FIXME: This line length was determined in 2015, on both
    317           x86_64 and ppc64, but it's worth re-evaluating in future with
    318           newer compilers, CPUs, or memchr() implementations etc.  */
    319       if (lines - plines <= bytes_read / 15)
    320         long_lines = true;
    321       else
    322         long_lines = false;
    323     }
    324 
    325   *bytes_out = bytes;
    326   *lines_out = lines;
    327 
    328   return true;
    329 }
    330 
    331 /* Count words.  FILE_X is the name of the file (or null for standard
    332    input) that is open on descriptor FD.  *FSTATUS is its status.
    333    CURRENT_POS is the current file offset if known, negative if unknown.
    334    Return true if successful.  */
    335 static bool
    336 wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
    337 {
    338   bool ok = true;
    339   char buf[BUFFER_SIZE + 1];
    340   size_t bytes_read;
    341   uintmax_t lines, words, chars, bytes, linelength;
    342   bool count_bytes, count_chars, count_complicated;
    343   char const *file = file_x ? file_x : _("standard input");
    344 
    345   lines = words = chars = bytes = linelength = 0;
    346 
    347   /* If in the current locale, chars are equivalent to bytes, we prefer
    348      counting bytes, because that's easier.  */
    349 #if MB_LEN_MAX > 1
    350   if (MB_CUR_MAX > 1)
    351     {
    352       count_bytes = print_bytes;
    353       count_chars = print_chars;
    354     }
    355   else
    356 #endif
    357     {
    358       count_bytes = print_bytes || print_chars;
    359       count_chars = false;
    360     }
    361   count_complicated = print_words || print_linelength;
    362 
    363   /* Advise the kernel of our access pattern only if we will read().  */
    364   if (!count_bytes || count_chars || print_lines || count_complicated)
    365     fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
    366 
    367   /* When counting only bytes, save some line- and word-counting
    368      overhead.  If FD is a 'regular' Unix file, using lseek is enough
    369      to get its 'size' in bytes.  Otherwise, read blocks of BUFFER_SIZE
    370      bytes at a time until EOF.  Note that the 'size' (number of bytes)
    371      that wc reports is smaller than stats.st_size when the file is not
    372      positioned at its beginning.  That's why the lseek calls below are
    373      necessary.  For example the command
    374      '(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group'
    375      should make wc report '0' bytes.  */
    376 
    377   if (count_bytes && !count_chars && !print_lines && !count_complicated)
    378     {
    379       bool skip_read = false;
    380 
    381       if (0 < fstatus->failed)
    382         fstatus->failed = fstat (fd, &fstatus->st);
    383 
    384       /* For sized files, seek to one st_blksize before EOF rather than to EOF.
    385          This works better for files in proc-like file systems where
    386          the size is only approximate.  */
    387       if (! fstatus->failed && usable_st_size (&fstatus->st)
    388           && 0 <= fstatus->st.st_size)
    389         {
    390           off_t end_pos = fstatus->st.st_size;
    391           if (current_pos < 0)
    392             current_pos = lseek (fd, 0, SEEK_CUR);
    393 
    394           if (end_pos % page_size)
    395             {
    396               /* We only need special handling of /proc and /sys files etc.
    397                  when they're a multiple of PAGE_SIZE.  In the common case
    398                  for files with st_size not a multiple of PAGE_SIZE,
    399                  it's more efficient and accurate to use st_size.
    400 
    401                  Be careful here.  The current position may actually be
    402                  beyond the end of the file.  As in the example above.  */
    403 
    404               bytes = end_pos < current_pos ? 0 : end_pos - current_pos;
    405               if (bytes && 0 <= lseek (fd, bytes, SEEK_CUR))
    406                 skip_read = true;
    407               else
    408                 bytes = 0;
    409             }
    410           else
    411             {
    412               off_t hi_pos = (end_pos
    413                               - end_pos % (STP_BLKSIZE (&fstatus->st) + 1));
    414               if (0 <= current_pos && current_pos < hi_pos
    415                   && 0 <= lseek (fd, hi_pos, SEEK_CUR))
    416                 bytes = hi_pos - current_pos;
    417             }
    418         }
    419 
    420       if (! skip_read)
    421         {
    422           fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
    423           while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
    424             {
    425               if (bytes_read == SAFE_READ_ERROR)
    426                 {
    427                   error (0, errno, "%s", quotef (file));
    428                   ok = false;
    429                   break;
    430                 }
    431               bytes += bytes_read;
    432             }
    433         }
    434     }
    435   else if (!count_chars && !count_complicated)
    436     {
    437 #ifdef USE_AVX2_WC_LINECOUNT
    438       static bool (*wc_lines_p) (char const *, int, uintmax_t *, uintmax_t *);
    439       if (!wc_lines_p)
    440         wc_lines_p = avx2_supported () ? wc_lines_avx2 : wc_lines;
    441 #else
    442       bool (*wc_lines_p) (char const *, int, uintmax_t *, uintmax_t *)
    443         = wc_lines;
    444 #endif
    445 
    446       /* Use a separate loop when counting only lines or lines and bytes --
    447          but not chars or words.  */
    448       ok = wc_lines_p (file, fd, &lines, &bytes);
    449     }
    450 #if MB_LEN_MAX > 1
    451 # define SUPPORT_OLD_MBRTOWC 1
    452   else if (MB_CUR_MAX > 1)
    453     {
    454       bool in_word = false;
    455       uintmax_t linepos = 0;
    456       mbstate_t state = {0};
    457       bool in_shift = false;
    458 # if SUPPORT_OLD_MBRTOWC
    459       /* Back-up the state before each multibyte character conversion and
    460          move the last incomplete character of the buffer to the front
    461          of the buffer.  This is needed because we don't know whether
    462          the 'mbrtowc' function updates the state when it returns -2, --
    463          this is the ISO C 99 and glibc-2.2 behavior - or not - amended
    464          ANSI C, glibc-2.1 and Solaris 5.7 behavior.  We don't have an
    465          autoconf test for this, yet.  */
    466       size_t prev = 0; /* number of bytes carried over from previous round */
    467 # else
    468       const size_t prev = 0;
    469 # endif
    470 
    471       while ((bytes_read = safe_read (fd, buf + prev, BUFFER_SIZE - prev)) > 0)
    472         {
    473           char const *p;
    474 # if SUPPORT_OLD_MBRTOWC
    475           mbstate_t backup_state;
    476 # endif
    477           if (bytes_read == SAFE_READ_ERROR)
    478             {
    479               error (0, errno, "%s", quotef (file));
    480               ok = false;
    481               break;
    482             }
    483 
    484           bytes += bytes_read;
    485           p = buf;
    486           bytes_read += prev;
    487           do
    488             {
    489               wchar_t wide_char;
    490               size_t n;
    491               bool wide = true;
    492 
    493               if (!in_shift && is_basic (*p))
    494                 {
    495                   /* Handle most ASCII characters quickly, without calling
    496                      mbrtowc().  */
    497                   n = 1;
    498                   wide_char = *p;
    499                   wide = false;
    500                 }
    501               else
    502                 {
    503                   in_shift = true;
    504 # if SUPPORT_OLD_MBRTOWC
    505                   backup_state = state;
    506 # endif
    507                   n = mbrtowc (&wide_char, p, bytes_read, &state);
    508                   if (n == (size_t) -2)
    509                     {
    510 # if SUPPORT_OLD_MBRTOWC
    511                       state = backup_state;
    512 # endif
    513                       break;
    514                     }
    515                   if (n == (size_t) -1)
    516                     {
    517                       /* Remember that we read a byte, but don't complain
    518                          about the error.  Because of the decoding error,
    519                          this is a considered to be byte but not a
    520                          character (that is, chars is not incremented).  */
    521                       p++;
    522                       bytes_read--;
    523                       continue;
    524                     }
    525                   if (mbsinit (&state))
    526                     in_shift = false;
    527                   if (n == 0)
    528                     {
    529                       wide_char = 0;
    530                       n = 1;
    531                     }
    532                 }
    533 
    534               switch (wide_char)
    535                 {
    536                 case '\n':
    537                   lines++;
    538                   FALLTHROUGH;
    539                 case '\r':
    540                 case '\f':
    541                   if (linepos > linelength)
    542                     linelength = linepos;
    543                   linepos = 0;
    544                   goto mb_word_separator;
    545                 case '\t':
    546                   linepos += 8 - (linepos % 8);
    547                   goto mb_word_separator;
    548                 case ' ':
    549                   linepos++;
    550                   FALLTHROUGH;
    551                 case '\v':
    552                 mb_word_separator:
    553                   words += in_word;
    554                   in_word = false;
    555                   break;
    556                 default:
    557                   if (wide && iswprint (wide_char))
    558                     {
    559                       /* wcwidth can be expensive on OSX for example,
    560                          so avoid if not needed.  */
    561                       if (print_linelength)
    562                         {
    563                           int width = wcwidth (wide_char);
    564                           if (width > 0)
    565                             linepos += width;
    566                         }
    567                       if (iswspace (wide_char) || iswnbspace (wide_char))
    568                         goto mb_word_separator;
    569                       in_word = true;
    570                     }
    571                   else if (!wide && isprint (to_uchar (*p)))
    572                     {
    573                       linepos++;
    574                       if (isspace (to_uchar (*p)))
    575                         goto mb_word_separator;
    576                       in_word = true;
    577                     }
    578                   break;
    579                 }
    580 
    581               p += n;
    582               bytes_read -= n;
    583               chars++;
    584             }
    585           while (bytes_read > 0);
    586 
    587 # if SUPPORT_OLD_MBRTOWC
    588           if (bytes_read > 0)
    589             {
    590               if (bytes_read == BUFFER_SIZE)
    591                 {
    592                   /* Encountered a very long redundant shift sequence.  */
    593                   p++;
    594                   bytes_read--;
    595                 }
    596               memmove (buf, p, bytes_read);
    597             }
    598           prev = bytes_read;
    599 # endif
    600         }
    601       if (linepos > linelength)
    602         linelength = linepos;
    603       words += in_word;
    604     }
    605 #endif
    606   else
    607     {
    608       bool in_word = false;
    609       uintmax_t linepos = 0;
    610 
    611       while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
    612         {
    613           char const *p = buf;
    614           if (bytes_read == SAFE_READ_ERROR)
    615             {
    616               error (0, errno, "%s", quotef (file));
    617               ok = false;
    618               break;
    619             }
    620 
    621           bytes += bytes_read;
    622           do
    623             {
    624               switch (*p++)
    625                 {
    626                 case '\n':
    627                   lines++;
    628                   FALLTHROUGH;
    629                 case '\r':
    630                 case '\f':
    631                   if (linepos > linelength)
    632                     linelength = linepos;
    633                   linepos = 0;
    634                   goto word_separator;
    635                 case '\t':
    636                   linepos += 8 - (linepos % 8);
    637                   goto word_separator;
    638                 case ' ':
    639                   linepos++;
    640                   FALLTHROUGH;
    641                 case '\v':
    642                 word_separator:
    643                   words += in_word;
    644                   in_word = false;
    645                   break;
    646                 default:
    647                   if (isprint (to_uchar (p[-1])))
    648                     {
    649                       linepos++;
    650                       if (isspace (to_uchar (p[-1]))
    651                           || isnbspace (to_uchar (p[-1])))
    652                         goto word_separator;
    653                       in_word = true;
    654                     }
    655                   break;
    656                 }
    657             }
    658           while (--bytes_read);
    659         }
    660       if (linepos > linelength)
    661         linelength = linepos;
    662       words += in_word;
    663     }
    664 
    665   if (count_chars < print_chars)
    666     chars = bytes;
    667 
    668   if (total_mode != total_only)
    669     write_counts (lines, words, chars, bytes, linelength, file_x);
    670 
    671   if (ckd_add (&total_lines, total_lines, lines))
    672     total_lines_overflow = true;
    673   if (ckd_add (&total_words, total_words, words))
    674     total_words_overflow = true;
    675   if (ckd_add (&total_chars, total_chars, chars))
    676     total_chars_overflow = true;
    677   if (ckd_add (&total_bytes, total_bytes, bytes))
    678     total_bytes_overflow = true;
    679 
    680   if (linelength > max_line_length)
    681     max_line_length = linelength;
    682 
    683   return ok;
    684 }
    685 
    686 static bool
    687 wc_file (char const *file, struct fstatus *fstatus)
    688 {
    689   if (! file || STREQ (file, "-"))
    690     {
    691       have_read_stdin = true;
    692       xset_binary_mode (STDIN_FILENO, O_BINARY);
    693       return wc (STDIN_FILENO, file, fstatus, -1);
    694     }
    695   else
    696     {
    697       int fd = open (file, O_RDONLY | O_BINARY);
    698       if (fd == -1)
    699         {
    700           error (0, errno, "%s", quotef (file));
    701           return false;
    702         }
    703       else
    704         {
    705           bool ok = wc (fd, file, fstatus, 0);
    706           if (close (fd) != 0)
    707             {
    708               error (0, errno, "%s", quotef (file));
    709               return false;
    710             }
    711           return ok;
    712         }
    713     }
    714 }
    715 
    716 /* Return the file status for the NFILES files addressed by FILE.
    717    Optimize the case where only one number is printed, for just one
    718    file; in that case we can use a print width of 1, so we don't need
    719    to stat the file.  Handle the case of (nfiles == 0) in the same way;
    720    that happens when we don't know how long the list of file names will be.  */
    721 
    722 static struct fstatus *
    723 get_input_fstatus (size_t nfiles, char *const *file)
    724 {
    725   struct fstatus *fstatus = xnmalloc (nfiles ? nfiles : 1, sizeof *fstatus);
    726 
    727   if (nfiles == 0
    728       || (nfiles == 1
    729           && ((print_lines + print_words + print_chars
    730                + print_bytes + print_linelength)
    731               == 1)))
    732     fstatus[0].failed = 1;
    733   else
    734     {
    735       for (size_t i = 0; i < nfiles; i++)
    736         fstatus[i].failed = (! file[i] || STREQ (file[i], "-")
    737                              ? fstat (STDIN_FILENO, &fstatus[i].st)
    738                              : stat (file[i], &fstatus[i].st));
    739     }
    740 
    741   return fstatus;
    742 }
    743 
    744 /* Return a print width suitable for the NFILES files whose status is
    745    recorded in FSTATUS.  Optimize the same special case that
    746    get_input_fstatus optimizes.  */
    747 
    748 ATTRIBUTE_PURE
    749 static int
    750 compute_number_width (size_t nfiles, struct fstatus const *fstatus)
    751 {
    752   int width = 1;
    753 
    754   if (0 < nfiles && fstatus[0].failed <= 0)
    755     {
    756       int minimum_width = 1;
    757       uintmax_t regular_total = 0;
    758 
    759       for (size_t i = 0; i < nfiles; i++)
    760         if (! fstatus[i].failed)
    761           {
    762             if (S_ISREG (fstatus[i].st.st_mode))
    763               regular_total += fstatus[i].st.st_size;
    764             else
    765               minimum_width = 7;
    766           }
    767 
    768       for (; 10 <= regular_total; regular_total /= 10)
    769         width++;
    770       if (width < minimum_width)
    771         width = minimum_width;
    772     }
    773 
    774   return width;
    775 }
    776 
    777 
    778 int
    779 main (int argc, char **argv)
    780 {
    781   bool ok;
    782   int optc;
    783   size_t nfiles;
    784   char **files;
    785   char *files_from = nullptr;
    786   struct fstatus *fstatus;
    787   struct Tokens tok;
    788 
    789   initialize_main (&argc, &argv);
    790   set_program_name (argv[0]);
    791   setlocale (LC_ALL, "");
    792   bindtextdomain (PACKAGE, LOCALEDIR);
    793   textdomain (PACKAGE);
    794 
    795   atexit (close_stdout);
    796 
    797   page_size = getpagesize ();
    798   /* Line buffer stdout to ensure lines are written atomically and immediately
    799      so that processes running in parallel do not intersperse their output.  */
    800   setvbuf (stdout, nullptr, _IOLBF, 0);
    801 
    802   posixly_correct = (getenv ("POSIXLY_CORRECT") != nullptr);
    803 
    804   print_lines = print_words = print_chars = print_bytes = false;
    805   print_linelength = false;
    806   total_lines = total_words = total_chars = total_bytes = max_line_length = 0;
    807 
    808   while ((optc = getopt_long (argc, argv, "clLmw", longopts, nullptr)) != -1)
    809     switch (optc)
    810       {
    811       case 'c':
    812         print_bytes = true;
    813         break;
    814 
    815       case 'm':
    816         print_chars = true;
    817         break;
    818 
    819       case 'l':
    820         print_lines = true;
    821         break;
    822 
    823       case 'w':
    824         print_words = true;
    825         break;
    826 
    827       case 'L':
    828         print_linelength = true;
    829         break;
    830 
    831       case DEBUG_PROGRAM_OPTION:
    832         debug = true;
    833         break;
    834 
    835       case FILES0_FROM_OPTION:
    836         files_from = optarg;
    837         break;
    838 
    839       case TOTAL_OPTION:
    840         total_mode = XARGMATCH ("--total", optarg, total_args, total_types);
    841         break;
    842 
    843       case_GETOPT_HELP_CHAR;
    844 
    845       case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
    846 
    847       default:
    848         usage (EXIT_FAILURE);
    849       }
    850 
    851   if (! (print_lines || print_words || print_chars || print_bytes
    852          || print_linelength))
    853     print_lines = print_words = print_bytes = true;
    854 
    855   bool read_tokens = false;
    856   struct argv_iterator *ai;
    857   if (files_from)
    858     {
    859       FILE *stream;
    860 
    861       /* When using --files0-from=F, you may not specify any files
    862          on the command-line.  */
    863       if (optind < argc)
    864         {
    865           error (0, 0, _("extra operand %s"), quoteaf (argv[optind]));
    866           fprintf (stderr, "%s\n",
    867                    _("file operands cannot be combined with --files0-from"));
    868           usage (EXIT_FAILURE);
    869         }
    870 
    871       if (STREQ (files_from, "-"))
    872         stream = stdin;
    873       else
    874         {
    875           stream = fopen (files_from, "r");
    876           if (stream == nullptr)
    877             error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
    878                    quoteaf (files_from));
    879         }
    880 
    881       /* Read the file list into RAM if we can detect its size and that
    882          size is reasonable.  Otherwise, we'll read a name at a time.  */
    883       struct stat st;
    884       if (fstat (fileno (stream), &st) == 0
    885           && S_ISREG (st.st_mode)
    886           && st.st_size <= MIN (10 * 1024 * 1024, physmem_available () / 2))
    887         {
    888           read_tokens = true;
    889           readtokens0_init (&tok);
    890           if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
    891             error (EXIT_FAILURE, 0, _("cannot read file names from %s"),
    892                    quoteaf (files_from));
    893           files = tok.tok;
    894           nfiles = tok.n_tok;
    895           ai = argv_iter_init_argv (files);
    896         }
    897       else
    898         {
    899           files = nullptr;
    900           nfiles = 0;
    901           ai = argv_iter_init_stream (stream);
    902         }
    903     }
    904   else
    905     {
    906       static char *stdin_only[] = { nullptr };
    907       files = (optind < argc ? argv + optind : stdin_only);
    908       nfiles = (optind < argc ? argc - optind : 1);
    909       ai = argv_iter_init_argv (files);
    910     }
    911 
    912   if (!ai)
    913     xalloc_die ();
    914 
    915   fstatus = get_input_fstatus (nfiles, files);
    916   if (total_mode == total_only)
    917     number_width = 1;  /* No extra padding, since no alignment requirement.  */
    918   else
    919     number_width = compute_number_width (nfiles, fstatus);
    920 
    921   ok = true;
    922   for (int i = 0; /* */; i++)
    923     {
    924       bool skip_file = false;
    925       enum argv_iter_err ai_err;
    926       char *file_name = argv_iter (ai, &ai_err);
    927       if (!file_name)
    928         {
    929           switch (ai_err)
    930             {
    931             case AI_ERR_EOF:
    932               goto argv_iter_done;
    933             case AI_ERR_READ:
    934               error (0, errno, _("%s: read error"),
    935                      quotef (files_from));
    936               ok = false;
    937               goto argv_iter_done;
    938             case AI_ERR_MEM:
    939               xalloc_die ();
    940             default:
    941               affirm (!"unexpected error code from argv_iter");
    942             }
    943         }
    944       if (files_from && STREQ (files_from, "-") && STREQ (file_name, "-"))
    945         {
    946           /* Give a better diagnostic in an unusual case:
    947              printf - | wc --files0-from=- */
    948           error (0, 0, _("when reading file names from stdin, "
    949                          "no file name of %s allowed"),
    950                  quoteaf (file_name));
    951           skip_file = true;
    952         }
    953 
    954       if (!file_name[0])
    955         {
    956           /* Diagnose a zero-length file name.  When it's one
    957              among many, knowing the record number may help.
    958              FIXME: currently print the record number only with
    959              --files0-from=FILE.  Maybe do it for argv, too?  */
    960           if (files_from == nullptr)
    961             error (0, 0, "%s", _("invalid zero-length file name"));
    962           else
    963             {
    964               /* Using the standard 'filename:line-number:' prefix here is
    965                  not totally appropriate, since NUL is the separator, not NL,
    966                  but it might be better than nothing.  */
    967               unsigned long int file_number = argv_iter_n_args (ai);
    968               error (0, 0, "%s:%lu: %s", quotef (files_from),
    969                      file_number, _("invalid zero-length file name"));
    970             }
    971           skip_file = true;
    972         }
    973 
    974       if (skip_file)
    975         ok = false;
    976       else
    977         ok &= wc_file (file_name, &fstatus[nfiles ? i : 0]);
    978 
    979       if (! nfiles)
    980         fstatus[0].failed = 1;
    981     }
    982  argv_iter_done:
    983 
    984   /* No arguments on the command line is fine.  That means read from stdin.
    985      However, no arguments on the --files0-from input stream is an error
    986      means don't read anything.  */
    987   if (ok && !files_from && argv_iter_n_args (ai) == 0)
    988     ok &= wc_file (nullptr, &fstatus[0]);
    989 
    990   if (read_tokens)
    991     readtokens0_free (&tok);
    992 
    993   if (total_mode != total_never
    994       && (total_mode != total_auto || 1 < argv_iter_n_args (ai)))
    995     {
    996       if (total_lines_overflow)
    997         {
    998           total_lines = UINTMAX_MAX;
    999           error (0, EOVERFLOW, _("total lines"));
   1000           ok = false;
   1001         }
   1002       if (total_words_overflow)
   1003         {
   1004           total_words = UINTMAX_MAX;
   1005           error (0, EOVERFLOW, _("total words"));
   1006           ok = false;
   1007         }
   1008       if (total_chars_overflow)
   1009         {
   1010           total_chars = UINTMAX_MAX;
   1011           error (0, EOVERFLOW, _("total characters"));
   1012           ok = false;
   1013         }
   1014       if (total_bytes_overflow)
   1015         {
   1016           total_bytes = UINTMAX_MAX;
   1017           error (0, EOVERFLOW, _("total bytes"));
   1018           ok = false;
   1019         }
   1020 
   1021       write_counts (total_lines, total_words, total_chars, total_bytes,
   1022                     max_line_length,
   1023                     total_mode != total_only ? _("total") : nullptr);
   1024     }
   1025 
   1026   argv_iter_free (ai);
   1027 
   1028   free (fstatus);
   1029 
   1030   if (have_read_stdin && close (STDIN_FILENO) != 0)
   1031     error (EXIT_FAILURE, errno, "-");
   1032 
   1033   return ok ? EXIT_SUCCESS : EXIT_FAILURE;
   1034 }