wc

Count words in <50 lines of C
Log | Files | Refs | README

busybox-wc_comments_stripped.c (3369B)


      1 #include "libbb.h"
      2 #include "unicode.h"
      3 
      4 /* Copyright (C) 2003  Manuel Novoa III  <mjn3@codepoet.org>
      5 /*
      6 /* Licensed under GPLv2 or later, see file LICENSE in this source tree.
      7 #if !ENABLE_LOCALE_SUPPORT
      8 # undef isprint
      9 # undef isspace
     10 # define isprint(c) ((unsigned)((c) - 0x20) <= (0x7e - 0x20))
     11 # define isspace(c) ((c) == ' ')
     12 #endif
     13 
     14 #if ENABLE_FEATURE_WC_LARGE
     15 # define COUNT_T unsigned long long
     16 # define COUNT_FMT "llu"
     17 #else
     18 # define COUNT_T unsigned
     19 # define COUNT_FMT "u"
     20 #endif
     21 
     22 enum {
     23 	WC_LINES    = 0, /* -l */
     24 	WC_WORDS    = 1, /* -w */
     25 	WC_UNICHARS = 2, /* -m */
     26 	WC_BYTES    = 3, /* -c */
     27 	WC_LENGTH   = 4, /* -L */
     28 	NUM_WCS     = 5,
     29 };
     30 
     31 int wc_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
     32 int wc_main(int argc UNUSED_PARAM, char **argv)
     33 {
     34 	const char *arg;
     35 	const char *start_fmt = " %9"COUNT_FMT + 1;
     36 	const char *fname_fmt = " %s\n";
     37 	COUNT_T *pcounts;
     38 	COUNT_T counts[NUM_WCS];
     39 	COUNT_T totals[NUM_WCS];
     40 	int num_files;
     41 	smallint status = EXIT_SUCCESS;
     42 	unsigned print_type;
     43 
     44 	init_unicode();
     45 
     46 	print_type = getopt32(argv, "lwmcL");
     47 
     48 	if (print_type == 0) {
     49 		print_type = (1 << WC_LINES) | (1 << WC_WORDS) | (1 << WC_BYTES);
     50 	}
     51 
     52 	argv += optind;
     53 	if (!argv[0]) {
     54 		*--argv = (char *) bb_msg_standard_input;
     55 		fname_fmt = "\n";
     56 	}
     57 	if (!argv[1]) { /* zero or one filename? */
     58 		if (!((print_type-1) & print_type)) /* exactly one option? */
     59 			start_fmt = "%"COUNT_FMT;
     60 	}
     61 
     62 	memset(totals, 0, sizeof(totals));
     63 
     64 	pcounts = counts;
     65 
     66 	num_files = 0;
     67 	while ((arg = *argv++) != NULL) {
     68 		FILE *fp;
     69 		const char *s;
     70 		unsigned u;
     71 		unsigned linepos;
     72 		smallint in_word;
     73 
     74 		++num_files;
     75 		fp = fopen_or_warn_stdin(arg);
     76 		if (!fp) {
     77 			status = EXIT_FAILURE;
     78 			continue;
     79 		}
     80 
     81 		memset(counts, 0, sizeof(counts));
     82 		linepos = 0;
     83 		in_word = 0;
     84 
     85 		while (1) {
     86 			int c;
     87 
     88 			c = getc(fp);
     89 			if (c == EOF) {
     90 				if (ferror(fp)) {
     91 					bb_simple_perror_msg(arg);
     92 					status = EXIT_FAILURE;
     93 				}
     94 				goto DO_EOF;  /* Treat an EOF as '\r'. */
     95 			}
     96 
     97 			++counts[WC_BYTES];
     98 			if (unicode_status != UNICODE_ON /* every byte is a new char */
     99 			 || (c & 0xc0) != 0x80 /* it isn't a 2nd+ byte of a Unicode char */
    100 			) {
    101 				++counts[WC_UNICHARS];
    102 			}
    103 
    104 			if (isprint_asciionly(c)) { /* FIXME: not unicode-aware */
    105 				++linepos;
    106 				if (!isspace(c)) {
    107 					in_word = 1;
    108 					continue;
    109 				}
    110 			} else if ((unsigned)(c - 9) <= 4) {
    111 				if (c == '\t') {
    112 					linepos = (linepos | 7) + 1;
    113 				} else {  /* '\n', '\r', '\f', or '\v' */
    114  DO_EOF:
    115 					if (linepos > counts[WC_LENGTH]) {
    116 						counts[WC_LENGTH] = linepos;
    117 					}
    118 					if (c == '\n') {
    119 						++counts[WC_LINES];
    120 					}
    121 					if (c != '\v') {
    122 						linepos = 0;
    123 					}
    124 				}
    125 			} else {
    126 				continue;
    127 			}
    128 
    129 			counts[WC_WORDS] += in_word;
    130 			in_word = 0;
    131 			if (c == EOF) {
    132 				break;
    133 			}
    134 		}
    135 
    136 		fclose_if_not_stdin(fp);
    137 
    138 		if (totals[WC_LENGTH] < counts[WC_LENGTH]) {
    139 			totals[WC_LENGTH] = counts[WC_LENGTH];
    140 		}
    141 		totals[WC_LENGTH] -= counts[WC_LENGTH];
    142 
    143  OUTPUT:
    144 		s = start_fmt;
    145 		u = 0;
    146 		do {
    147 			if (print_type & (1 << u)) {
    148 				printf(s, pcounts[u]);
    149 				s = " %9"COUNT_FMT; /* Ok... restore the leading space. */
    150 			}
    151 			totals[u] += pcounts[u];
    152 		} while (++u < NUM_WCS);
    153 		printf(fname_fmt, arg);
    154 	}
    155 
    156 	if (num_files > 1) {
    157 		num_files = 0;  /* Make sure we don't get here again. */
    158 		arg = "total";
    159 		pcounts = totals;
    160 		--argv;
    161 		goto OUTPUT;
    162 	}
    163 
    164 	fflush_stdout_and_exit(status);
    165 }