| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| |
|
| | |
| | |
| | |
| | |
| |
|
| | #include <config.h> |
| |
|
| | #include <stdio.h> |
| | #include <getopt.h> |
| | #include <sys/types.h> |
| | #include "system.h" |
| |
|
| | #include "assure.h" |
| | #include "fadvise.h" |
| | #include "getndelim2.h" |
| |
|
| | #include "set-fields.h" |
| |
|
| | |
| | #define PROGRAM_NAME "cut" |
| |
|
| | #define AUTHORS \ |
| | proper_name ("David M. Ihnat"), \ |
| | proper_name ("David MacKenzie"), \ |
| | proper_name ("Jim Meyering") |
| |
|
| | #define FATAL_ERROR(Message) \ |
| | do \ |
| | { \ |
| | error (0, 0, (Message)); \ |
| | usage (EXIT_FAILURE); \ |
| | } \ |
| | while (0) |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | static struct field_range_pair *current_rp; |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | static char *field_1_buffer; |
| |
|
| | |
| | static size_t field_1_bufsize; |
| |
|
| | |
| | |
| | |
| | static bool suppress_non_delimited; |
| |
|
| | |
| | |
| | static bool complement; |
| |
|
| | |
| | static unsigned char delim; |
| |
|
| | |
| | static unsigned char line_delim = '\n'; |
| |
|
| | |
| | static size_t output_delimiter_length; |
| |
|
| | |
| | |
| | static char *output_delimiter_string; |
| |
|
| | |
| | static char output_delimiter_default[1]; |
| |
|
| | |
| | static bool have_read_stdin; |
| |
|
| | |
| | |
| | enum |
| | { |
| | OUTPUT_DELIMITER_OPTION = CHAR_MAX + 1, |
| | COMPLEMENT_OPTION |
| | }; |
| |
|
| | static struct option const longopts[] = |
| | { |
| | {"bytes", required_argument, nullptr, 'b'}, |
| | {"characters", required_argument, nullptr, 'c'}, |
| | {"fields", required_argument, nullptr, 'f'}, |
| | {"delimiter", required_argument, nullptr, 'd'}, |
| | {"only-delimited", no_argument, nullptr, 's'}, |
| | {"output-delimiter", required_argument, nullptr, OUTPUT_DELIMITER_OPTION}, |
| | {"complement", no_argument, nullptr, COMPLEMENT_OPTION}, |
| | {"zero-terminated", no_argument, nullptr, 'z'}, |
| | {GETOPT_HELP_OPTION_DECL}, |
| | {GETOPT_VERSION_OPTION_DECL}, |
| | {nullptr, 0, nullptr, 0} |
| | }; |
| |
|
| | void |
| | usage (int status) |
| | { |
| | if (status != EXIT_SUCCESS) |
| | emit_try_help (); |
| | else |
| | { |
| | printf (_("\ |
| | Usage: %s OPTION... [FILE]...\n\ |
| | "), |
| | program_name); |
| | fputs (_("\ |
| | Print selected parts of lines from each FILE to standard output.\n\ |
| | "), stdout); |
| |
|
| | emit_stdin_note (); |
| | emit_mandatory_arg_note (); |
| |
|
| | fputs (_("\ |
| | -b, --bytes=LIST select only these bytes\n\ |
| | -c, --characters=LIST select only these characters\n\ |
| | -d, --delimiter=DELIM use DELIM instead of TAB for field delimiter\n\ |
| | "), stdout); |
| | fputs (_("\ |
| | -f, --fields=LIST select only these fields; also print any line\n\ |
| | that contains no delimiter character, unless\n\ |
| | the -s option is specified\n\ |
| | -n (ignored)\n\ |
| | "), stdout); |
| | fputs (_("\ |
| | --complement complement the set of selected bytes, characters\n\ |
| | or fields\n\ |
| | "), stdout); |
| | fputs (_("\ |
| | -s, --only-delimited do not print lines not containing delimiters\n\ |
| | --output-delimiter=STRING use STRING as the output delimiter\n\ |
| | the default is to use the input delimiter\n\ |
| | "), stdout); |
| | fputs (_("\ |
| | -z, --zero-terminated line delimiter is NUL, not newline\n\ |
| | "), stdout); |
| | fputs (HELP_OPTION_DESCRIPTION, stdout); |
| | fputs (VERSION_OPTION_DESCRIPTION, stdout); |
| | fputs (_("\ |
| | \n\ |
| | Use one, and only one of -b, -c or -f. Each LIST is made up of one\n\ |
| | range, or many ranges separated by commas. Selected input is written\n\ |
| | in the same order that it is read, and is written exactly once.\n\ |
| | "), stdout); |
| | fputs (_("\ |
| | Each range is one of:\n\ |
| | \n\ |
| | N N'th byte, character or field, counted from 1\n\ |
| | N- from N'th byte, character or field, to end of line\n\ |
| | N-M from N'th to M'th (included) byte, character or field\n\ |
| | -M from first to M'th (included) byte, character or field\n\ |
| | "), stdout); |
| | emit_ancillary_info (PROGRAM_NAME); |
| | } |
| | exit (status); |
| | } |
| |
|
| |
|
| | |
| | |
| |
|
| | static inline void |
| | next_item (uintmax_t *item_idx) |
| | { |
| | (*item_idx)++; |
| | if ((*item_idx) > current_rp->hi) |
| | current_rp++; |
| | } |
| |
|
| | |
| |
|
| | static inline bool |
| | print_kth (uintmax_t k) |
| | { |
| | return current_rp->lo <= k; |
| | } |
| |
|
| | |
| |
|
| | static inline bool |
| | is_range_start_index (uintmax_t k) |
| | { |
| | return k == current_rp->lo; |
| | } |
| |
|
| | |
| |
|
| | static void |
| | cut_bytes (FILE *stream) |
| | { |
| | uintmax_t byte_idx; |
| | |
| | |
| | bool print_delimiter; |
| |
|
| | byte_idx = 0; |
| | print_delimiter = false; |
| | current_rp = frp; |
| | while (true) |
| | { |
| | int c; |
| |
|
| | c = getc (stream); |
| |
|
| | if (c == line_delim) |
| | { |
| | if (putchar (c) < 0) |
| | write_error (); |
| | byte_idx = 0; |
| | print_delimiter = false; |
| | current_rp = frp; |
| | } |
| | else if (c == EOF) |
| | { |
| | if (byte_idx > 0) |
| | { |
| | if (putchar (line_delim) < 0) |
| | write_error (); |
| | } |
| | break; |
| | } |
| | else |
| | { |
| | next_item (&byte_idx); |
| | if (print_kth (byte_idx)) |
| | { |
| | if (output_delimiter_string != output_delimiter_default) |
| | { |
| | if (print_delimiter && is_range_start_index (byte_idx)) |
| | { |
| | if (fwrite (output_delimiter_string, sizeof (char), |
| | output_delimiter_length, stdout) |
| | != output_delimiter_length) |
| | write_error (); |
| | } |
| | print_delimiter = true; |
| | } |
| |
|
| | if (putchar (c) < 0) |
| | write_error (); |
| | } |
| | } |
| | } |
| | } |
| |
|
| | |
| |
|
| | static void |
| | cut_fields (FILE *stream) |
| | { |
| | int c; |
| | uintmax_t field_idx = 1; |
| | bool found_any_selected_field = false; |
| | bool buffer_first_field; |
| |
|
| | current_rp = frp; |
| |
|
| | c = getc (stream); |
| | if (c == EOF) |
| | return; |
| |
|
| | ungetc (c, stream); |
| | c = 0; |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | buffer_first_field = (suppress_non_delimited ^ !print_kth (1)); |
| |
|
| | while (true) |
| | { |
| | if (field_idx == 1 && buffer_first_field) |
| | { |
| | ssize_t len; |
| | size_t n_bytes; |
| |
|
| | len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0, |
| | GETNLINE_NO_LIMIT, delim, line_delim, stream); |
| | if (len < 0) |
| | { |
| | free (field_1_buffer); |
| | field_1_buffer = nullptr; |
| | if (ferror (stream) || feof (stream)) |
| | break; |
| | xalloc_die (); |
| | } |
| |
|
| | n_bytes = len; |
| | affirm (n_bytes != 0); |
| |
|
| | c = 0; |
| |
|
| | |
| | |
| | |
| | if (to_uchar (field_1_buffer[n_bytes - 1]) != delim) |
| | { |
| | if (suppress_non_delimited) |
| | { |
| | |
| | } |
| | else |
| | { |
| | if (fwrite (field_1_buffer, sizeof (char), n_bytes, stdout) |
| | != n_bytes) |
| | write_error (); |
| | |
| | if (field_1_buffer[n_bytes - 1] != line_delim) |
| | { |
| | if (putchar (line_delim) < 0) |
| | write_error (); |
| | } |
| | c = line_delim; |
| | } |
| | continue; |
| | } |
| |
|
| | if (print_kth (1)) |
| | { |
| | |
| | if (fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout) |
| | != n_bytes - 1) |
| | write_error (); |
| |
|
| | |
| | if (delim == line_delim) |
| | { |
| | int last_c = getc (stream); |
| | if (last_c != EOF) |
| | { |
| | ungetc (last_c, stream); |
| | found_any_selected_field = true; |
| | } |
| | } |
| | else |
| | { |
| | found_any_selected_field = true; |
| | } |
| | } |
| | next_item (&field_idx); |
| | } |
| |
|
| | int prev_c = c; |
| |
|
| | if (print_kth (field_idx)) |
| | { |
| | if (found_any_selected_field) |
| | { |
| | if (fwrite (output_delimiter_string, sizeof (char), |
| | output_delimiter_length, stdout) |
| | != output_delimiter_length) |
| | write_error (); |
| | } |
| | found_any_selected_field = true; |
| |
|
| | while ((c = getc (stream)) != delim && c != line_delim && c != EOF) |
| | { |
| | if (putchar (c) < 0) |
| | write_error (); |
| | prev_c = c; |
| | } |
| | } |
| | else |
| | { |
| | while ((c = getc (stream)) != delim && c != line_delim && c != EOF) |
| | prev_c = c; |
| | } |
| |
|
| | |
| | if (delim == line_delim && c == delim) |
| | { |
| | int last_c = getc (stream); |
| | if (last_c != EOF) |
| | ungetc (last_c, stream); |
| | else |
| | c = last_c; |
| | } |
| |
|
| | if (c == delim) |
| | next_item (&field_idx); |
| | else if (c == line_delim || c == EOF) |
| | { |
| | if (found_any_selected_field |
| | || !(suppress_non_delimited && field_idx == 1)) |
| | { |
| | |
| | if (c == line_delim || prev_c != line_delim |
| | || delim == line_delim) |
| | { |
| | if (putchar (line_delim) < 0) |
| | write_error (); |
| | } |
| | } |
| | if (c == EOF) |
| | break; |
| |
|
| | |
| | field_idx = 1; |
| | current_rp = frp; |
| | found_any_selected_field = false; |
| | } |
| | } |
| | } |
| |
|
| | |
| | |
| |
|
| | static bool |
| | cut_file (char const *file, void (*cut_stream) (FILE *)) |
| | { |
| | FILE *stream; |
| |
|
| | if (streq (file, "-")) |
| | { |
| | have_read_stdin = true; |
| | stream = stdin; |
| | assume (stream); |
| | } |
| | else |
| | { |
| | stream = fopen (file, "r"); |
| | if (stream == nullptr) |
| | { |
| | error (0, errno, "%s", quotef (file)); |
| | return false; |
| | } |
| | } |
| |
|
| | fadvise (stream, FADVISE_SEQUENTIAL); |
| |
|
| | cut_stream (stream); |
| |
|
| | int err = errno; |
| | if (!ferror (stream)) |
| | err = 0; |
| | if (streq (file, "-")) |
| | clearerr (stream); |
| | else if (fclose (stream) == EOF) |
| | err = errno; |
| | if (err) |
| | { |
| | error (0, err, "%s", quotef (file)); |
| | return false; |
| | } |
| | return true; |
| | } |
| |
|
| | int |
| | main (int argc, char **argv) |
| | { |
| | int optc; |
| | bool ok; |
| | bool delim_specified = false; |
| | bool byte_mode = false; |
| | char *spec_list_string = nullptr; |
| |
|
| | initialize_main (&argc, &argv); |
| | set_program_name (argv[0]); |
| | setlocale (LC_ALL, ""); |
| | bindtextdomain (PACKAGE, LOCALEDIR); |
| | textdomain (PACKAGE); |
| |
|
| | atexit (close_stdout); |
| |
|
| | |
| | suppress_non_delimited = false; |
| |
|
| | delim = '\0'; |
| | have_read_stdin = false; |
| |
|
| | while ((optc = getopt_long (argc, argv, "b:c:d:f:nsz", longopts, nullptr)) |
| | != -1) |
| | { |
| | switch (optc) |
| | { |
| | case 'b': |
| | case 'c': |
| | |
| | byte_mode = true; |
| | FALLTHROUGH; |
| | case 'f': |
| | |
| | if (spec_list_string) |
| | FATAL_ERROR (_("only one list may be specified")); |
| | spec_list_string = optarg; |
| | break; |
| |
|
| | case 'd': |
| | |
| | |
| | if (optarg[0] != '\0' && optarg[1] != '\0') |
| | FATAL_ERROR (_("the delimiter must be a single character")); |
| | delim = optarg[0]; |
| | delim_specified = true; |
| | break; |
| |
|
| | case OUTPUT_DELIMITER_OPTION: |
| | |
| | |
| | output_delimiter_length = (optarg[0] == '\0' |
| | ? 1 : strlen (optarg)); |
| | output_delimiter_string = optarg; |
| | break; |
| |
|
| | case 'n': |
| | break; |
| |
|
| | case 's': |
| | suppress_non_delimited = true; |
| | break; |
| |
|
| | case 'z': |
| | line_delim = '\0'; |
| | break; |
| |
|
| | case COMPLEMENT_OPTION: |
| | complement = true; |
| | break; |
| |
|
| | case_GETOPT_HELP_CHAR; |
| | case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); |
| | default: |
| | usage (EXIT_FAILURE); |
| | } |
| | } |
| |
|
| | if (!spec_list_string) |
| | FATAL_ERROR (_("you must specify a list of bytes, characters, or fields")); |
| |
|
| | if (byte_mode) |
| | { |
| | if (delim_specified) |
| | FATAL_ERROR (_("an input delimiter may be specified only\ |
| | when operating on fields")); |
| |
|
| | if (suppress_non_delimited) |
| | FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\ |
| | \tonly when operating on fields")); |
| | } |
| |
|
| | set_fields (spec_list_string, |
| | ((byte_mode ? SETFLD_ERRMSG_USE_POS : 0) |
| | | (complement ? SETFLD_COMPLEMENT : 0))); |
| |
|
| | if (!delim_specified) |
| | delim = '\t'; |
| |
|
| | if (output_delimiter_string == nullptr) |
| | { |
| | output_delimiter_default[0] = delim; |
| | output_delimiter_string = output_delimiter_default; |
| | output_delimiter_length = 1; |
| | } |
| |
|
| | void (*cut_stream) (FILE *) = byte_mode ? cut_bytes : cut_fields; |
| | if (optind == argc) |
| | ok = cut_file ("-", cut_stream); |
| | else |
| | for (ok = true; optind < argc; optind++) |
| | ok &= cut_file (argv[optind], cut_stream); |
| |
|
| |
|
| | if (have_read_stdin && fclose (stdin) == EOF) |
| | { |
| | error (0, errno, "-"); |
| | ok = false; |
| | } |
| |
|
| | return ok ? EXIT_SUCCESS : EXIT_FAILURE; |
| | } |
| |
|