busybox-wc_comments_stripped.c (3369B)
1 #include "libbb.h" 2 #include "unicode.h" 3 4 /* Copyright (C) 2003 Manuel Novoa III <mjn3@codepoet.org> 5 /* 6 /* Licensed under GPLv2 or later, see file LICENSE in this source tree. 7 #if !ENABLE_LOCALE_SUPPORT 8 # undef isprint 9 # undef isspace 10 # define isprint(c) ((unsigned)((c) - 0x20) <= (0x7e - 0x20)) 11 # define isspace(c) ((c) == ' ') 12 #endif 13 14 #if ENABLE_FEATURE_WC_LARGE 15 # define COUNT_T unsigned long long 16 # define COUNT_FMT "llu" 17 #else 18 # define COUNT_T unsigned 19 # define COUNT_FMT "u" 20 #endif 21 22 enum { 23 WC_LINES = 0, /* -l */ 24 WC_WORDS = 1, /* -w */ 25 WC_UNICHARS = 2, /* -m */ 26 WC_BYTES = 3, /* -c */ 27 WC_LENGTH = 4, /* -L */ 28 NUM_WCS = 5, 29 }; 30 31 int wc_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; 32 int wc_main(int argc UNUSED_PARAM, char **argv) 33 { 34 const char *arg; 35 const char *start_fmt = " %9"COUNT_FMT + 1; 36 const char *fname_fmt = " %s\n"; 37 COUNT_T *pcounts; 38 COUNT_T counts[NUM_WCS]; 39 COUNT_T totals[NUM_WCS]; 40 int num_files; 41 smallint status = EXIT_SUCCESS; 42 unsigned print_type; 43 44 init_unicode(); 45 46 print_type = getopt32(argv, "lwmcL"); 47 48 if (print_type == 0) { 49 print_type = (1 << WC_LINES) | (1 << WC_WORDS) | (1 << WC_BYTES); 50 } 51 52 argv += optind; 53 if (!argv[0]) { 54 *--argv = (char *) bb_msg_standard_input; 55 fname_fmt = "\n"; 56 } 57 if (!argv[1]) { /* zero or one filename? */ 58 if (!((print_type-1) & print_type)) /* exactly one option? */ 59 start_fmt = "%"COUNT_FMT; 60 } 61 62 memset(totals, 0, sizeof(totals)); 63 64 pcounts = counts; 65 66 num_files = 0; 67 while ((arg = *argv++) != NULL) { 68 FILE *fp; 69 const char *s; 70 unsigned u; 71 unsigned linepos; 72 smallint in_word; 73 74 ++num_files; 75 fp = fopen_or_warn_stdin(arg); 76 if (!fp) { 77 status = EXIT_FAILURE; 78 continue; 79 } 80 81 memset(counts, 0, sizeof(counts)); 82 linepos = 0; 83 in_word = 0; 84 85 while (1) { 86 int c; 87 88 c = getc(fp); 89 if (c == EOF) { 90 if (ferror(fp)) { 91 bb_simple_perror_msg(arg); 92 status = EXIT_FAILURE; 93 } 94 goto DO_EOF; /* Treat an EOF as '\r'. */ 95 } 96 97 ++counts[WC_BYTES]; 98 if (unicode_status != UNICODE_ON /* every byte is a new char */ 99 || (c & 0xc0) != 0x80 /* it isn't a 2nd+ byte of a Unicode char */ 100 ) { 101 ++counts[WC_UNICHARS]; 102 } 103 104 if (isprint_asciionly(c)) { /* FIXME: not unicode-aware */ 105 ++linepos; 106 if (!isspace(c)) { 107 in_word = 1; 108 continue; 109 } 110 } else if ((unsigned)(c - 9) <= 4) { 111 if (c == '\t') { 112 linepos = (linepos | 7) + 1; 113 } else { /* '\n', '\r', '\f', or '\v' */ 114 DO_EOF: 115 if (linepos > counts[WC_LENGTH]) { 116 counts[WC_LENGTH] = linepos; 117 } 118 if (c == '\n') { 119 ++counts[WC_LINES]; 120 } 121 if (c != '\v') { 122 linepos = 0; 123 } 124 } 125 } else { 126 continue; 127 } 128 129 counts[WC_WORDS] += in_word; 130 in_word = 0; 131 if (c == EOF) { 132 break; 133 } 134 } 135 136 fclose_if_not_stdin(fp); 137 138 if (totals[WC_LENGTH] < counts[WC_LENGTH]) { 139 totals[WC_LENGTH] = counts[WC_LENGTH]; 140 } 141 totals[WC_LENGTH] -= counts[WC_LENGTH]; 142 143 OUTPUT: 144 s = start_fmt; 145 u = 0; 146 do { 147 if (print_type & (1 << u)) { 148 printf(s, pcounts[u]); 149 s = " %9"COUNT_FMT; /* Ok... restore the leading space. */ 150 } 151 totals[u] += pcounts[u]; 152 } while (++u < NUM_WCS); 153 printf(fname_fmt, arg); 154 } 155 156 if (num_files > 1) { 157 num_files = 0; /* Make sure we don't get here again. */ 158 arg = "total"; 159 pcounts = totals; 160 --argv; 161 goto OUTPUT; 162 } 163 164 fflush_stdout_and_exit(status); 165 }