summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2016-11-17 21:41:39 (GMT)
committerPaul Eggert <eggert@cs.ucla.edu>2016-11-19 09:18:31 (GMT)
commit80c97aa06e8f0320ff397a74a018eadc6a21f5fa (patch)
tree1b5816ca1cc3799ea8f03c790d5b5fabff790233
parentc87daf6fdcaa116308663047c2e4fb8ff38011c7 (diff)
downloadgrep-80c97aa06e8f0320ff397a74a018eadc6a21f5fa.zip
grep-80c97aa06e8f0320ff397a74a018eadc6a21f5fa.tar.gz
grep-80c97aa06e8f0320ff397a74a018eadc6a21f5fa.tar.bz2
grep: scale back /dev/null speedup
The performance improvement when output is /dev/null (commit af6af288eac28951b5eee1eaaf373e22b2193b7b dated 2016-05-01) breaks scripts that run "PROGRAM | grep PATTERN >/dev/null" where PROGRAM dies when writing into a broken pipe. Suppress the improvement if standard input is not seekable. Problem reported by Gary Johnson (Bug#24941). * NEWS: Document this. * src/grep.c (seek_failed): New static var. (seek_data_failed): Move decl earlier, to be next to seek_failed. (file_must_have_nulls): Skip useless syscalls if seek_failed. Lessen source-code nesting. (reset): Set seek_failed and seek_data_failed. Try lseek even on non-regular files. (grep): New arg INEOF. All callers changed. Do not clear seek_data_failed here, since 'reset' now does this. (finalize_input): New static function. (grepdesc): Use it. (main): Do not exit on first match merely because output is /dev/null. * tests/grep-dev-null-out: Adjust to new behavior.
-rw-r--r--NEWS6
-rw-r--r--src/grep.c135
-rwxr-xr-xtests/grep-dev-null-out3
3 files changed, 91 insertions, 53 deletions
diff --git a/NEWS b/NEWS
index a95c875..a165377 100644
--- a/NEWS
+++ b/NEWS
@@ -4,6 +4,12 @@ GNU grep NEWS -*- outline -*-
** Bug fixes
+ grep by default now reads all of standard input if it is a pipe,
+ even if this cannot affect grep's output or exit status. This works
+ better with nonportable scripts that run "PROGRAM | grep PATTERN
+ >/dev/null" where PROGRAM dies when writing into a broken pipe.
+ [bug introduced in grep-2.26]
+
grep -Pz no longer rejects patterns containing ^ and $, and is
more cautious about special patterns like (?-m) and (*FAIL).
[bug introduced in grep-2.23]
diff --git a/src/grep.c b/src/grep.c
index cafa0a2..5ce8f95 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -580,6 +580,10 @@ enum { SEEK_DATA = SEEK_SET };
enum { SEEK_HOLE = SEEK_SET };
#endif
+/* True if lseek with SEEK_CUR or SEEK_DATA failed on the current input. */
+static bool seek_failed;
+static bool seek_data_failed;
+
/* Functions we'll use to search. */
typedef void (*compile_fp_t) (char const *, size_t);
typedef size_t (*execute_fp_t) (char *, size_t, size_t *, char const *);
@@ -718,31 +722,26 @@ buf_has_nulls (char *buf, size_t size)
static bool
file_must_have_nulls (size_t size, int fd, struct stat const *st)
{
- if (usable_st_size (st))
+ /* If the file has holes, it must contain a null byte somewhere. */
+ if (SEEK_HOLE != SEEK_SET && !seek_failed
+ && usable_st_size (st) && size < st->st_size)
{
- if (st->st_size <= size)
- return false;
-
- /* If the file has holes, it must contain a null byte somewhere. */
- if (SEEK_HOLE != SEEK_SET)
+ off_t cur = size;
+ if (O_BINARY || fd == STDIN_FILENO)
{
- off_t cur = size;
- if (O_BINARY || fd == STDIN_FILENO)
- {
- cur = lseek (fd, 0, SEEK_CUR);
- if (cur < 0)
- return false;
- }
+ cur = lseek (fd, 0, SEEK_CUR);
+ if (cur < 0)
+ return false;
+ }
- /* Look for a hole after the current location. */
- off_t hole_start = lseek (fd, cur, SEEK_HOLE);
- if (0 <= hole_start)
- {
- if (lseek (fd, cur, SEEK_SET) < 0)
- suppressible_error (filename, errno);
- if (hole_start < st->st_size)
- return true;
- }
+ /* Look for a hole after the current location. */
+ off_t hole_start = lseek (fd, cur, SEEK_HOLE);
+ if (0 <= hole_start)
+ {
+ if (lseek (fd, cur, SEEK_SET) < 0)
+ suppressible_error (filename, errno);
+ if (hole_start < st->st_size)
+ return true;
}
}
@@ -806,13 +805,12 @@ static int bufdesc; /* File descriptor. */
static char *bufbeg; /* Beginning of user-visible stuff. */
static char *buflim; /* Limit of user-visible stuff. */
static size_t pagesize; /* alignment of memory pages */
-static off_t bufoffset; /* Read offset; defined on regular files. */
+static off_t bufoffset; /* Read offset. */
static off_t after_last_match; /* Pointer after last matching line that
would have been output if we were
outputting characters. */
static bool skip_nuls; /* Skip '\0' in data. */
static bool skip_empty_lines; /* Skip empty lines in data. */
-static bool seek_data_failed; /* lseek with SEEK_DATA failed. */
static uintmax_t totalnl; /* Total newline count before lastnl. */
/* Return VAL aligned to the next multiple of ALIGNMENT. VAL can be
@@ -851,20 +849,20 @@ reset (int fd, struct stat const *st)
bufbeg = buflim = ALIGN_TO (buffer + 1, pagesize);
bufbeg[-1] = eolbyte;
bufdesc = fd;
+ bufoffset = fd == STDIN_FILENO ? lseek (fd, 0, SEEK_CUR) : 0;
+ seek_failed = bufoffset < 0;
+
+ /* Assume SEEK_DATA fails if SEEK_CUR does. */
+ seek_data_failed = seek_failed;
- if (S_ISREG (st->st_mode))
+ if (seek_failed)
{
- if (fd != STDIN_FILENO)
- bufoffset = 0;
- else
+ if (errno != ESPIPE)
{
- bufoffset = lseek (fd, 0, SEEK_CUR);
- if (bufoffset < 0)
- {
- suppressible_error (filename, errno);
- return false;
- }
+ suppressible_error (filename, errno);
+ return false;
}
+ bufoffset = 0;
}
return true;
}
@@ -1477,9 +1475,10 @@ grepbuf (char *beg, char const *lim)
return outleft0 - outleft;
}
-/* Search a given (non-directory) file. Return a count of lines printed. */
+/* Search a given (non-directory) file. Return a count of lines printed.
+ Set *INEOF to true if end-of-file reached. */
static intmax_t
-grep (int fd, struct stat const *st)
+grep (int fd, struct stat const *st, bool *ineof)
{
intmax_t nlines, i;
size_t residue, save;
@@ -1507,7 +1506,6 @@ grep (int fd, struct stat const *st)
pending = 0;
skip_nuls = skip_empty_lines && !eol;
encoding_error_output = false;
- seek_data_failed = false;
nlines = 0;
residue = 0;
@@ -1542,7 +1540,10 @@ grep (int fd, struct stat const *st)
/* no more data to scan (eof) except for maybe a residue -> break */
if (beg == buflim)
- break;
+ {
+ *ineof = true;
+ break;
+ }
zap_nuls (beg, buflim, nul_zapper);
@@ -1742,11 +1743,46 @@ grepfile (int dirdesc, char const *name, bool follow, bool command_line)
return grepdesc (desc, command_line);
}
+/* Finish reading from FD, with status ST and where end-of-file has
+ been seen if INEOF. Typically this is a no-op, but when reading
+ from standard input this may adjust the file offset or drain a
+ pipe. */
+
+static void
+finalize_input (int fd, struct stat const *st, bool ineof)
+{
+ if (fd != STDIN_FILENO)
+ return;
+
+ if (outleft)
+ {
+ if (ineof)
+ return;
+ if (seek_failed)
+ {
+ while (fillbuf (0, st))
+ if (bufbeg == buflim)
+ return;
+ }
+ else if (0 <= lseek (fd, 0, SEEK_END))
+ return;
+ }
+ else
+ {
+ if (seek_failed || bufoffset == after_last_match
+ || 0 <= lseek (fd, after_last_match, SEEK_SET))
+ return;
+ }
+
+ suppressible_error (filename, errno);
+}
+
static bool
grepdesc (int desc, bool command_line)
{
intmax_t count;
bool status = true;
+ bool ineof = false;
struct stat st;
/* Get the file status, possibly for the second time. This catches
@@ -1839,7 +1875,7 @@ grepdesc (int desc, bool command_line)
if (O_BINARY && !isatty (desc))
set_binary_mode (desc, O_BINARY);
- count = grep (desc, &st);
+ count = grep (desc, &st, &ineof);
if (count_matches)
{
if (out_file)
@@ -1856,7 +1892,10 @@ grepdesc (int desc, bool command_line)
}
status = !count;
- if (list_files == (status ? LISTFILES_NONMATCHING : LISTFILES_MATCHING))
+
+ if (list_files == LISTFILES_NONE)
+ finalize_input (desc, &st, ineof);
+ else if (list_files == (status ? LISTFILES_NONMATCHING : LISTFILES_MATCHING))
{
print_filename ();
putchar_errno ('\n' & filename_mask);
@@ -1864,15 +1903,6 @@ grepdesc (int desc, bool command_line)
fflush_errno ();
}
- if (desc == STDIN_FILENO)
- {
- off_t required_offset = outleft ? bufoffset : after_last_match;
- if (required_offset != bufoffset
- && lseek (desc, required_offset, SEEK_SET) < 0
- && S_ISREG (st.st_mode))
- suppressible_error (filename, errno);
- }
-
closeout:
if (desc != STDIN_FILENO && close (desc) != 0)
suppressible_error (filename, errno);
@@ -2699,6 +2729,7 @@ main (int argc, char **argv)
if (show_help)
usage (EXIT_SUCCESS);
+ bool dev_null_output = false;
bool possibly_tty = false;
struct stat tmp_stat;
if (! exit_on_match && fstat (STDOUT_FILENO, &tmp_stat) == 0)
@@ -2710,7 +2741,7 @@ main (int argc, char **argv)
struct stat null_stat;
if (stat ("/dev/null", &null_stat) == 0
&& SAME_INODE (tmp_stat, null_stat))
- exit_on_match = true;
+ dev_null_output = true;
else
possibly_tty = true;
}
@@ -2733,9 +2764,9 @@ main (int argc, char **argv)
/* POSIX says -c, -l and -q are mutually exclusive. In this
implementation, -q overrides -l and -L, which in turn override -c. */
- if (exit_on_match)
+ if (exit_on_match | dev_null_output)
list_files = LISTFILES_NONE;
- if (exit_on_match || list_files != LISTFILES_NONE)
+ if ((exit_on_match | dev_null_output) || list_files != LISTFILES_NONE)
{
count_matches = false;
done_on_match = true;
diff --git a/tests/grep-dev-null-out b/tests/grep-dev-null-out
index 7f0e1c5..16ddadf 100755
--- a/tests/grep-dev-null-out
+++ b/tests/grep-dev-null-out
@@ -6,6 +6,7 @@
require_timeout_
${AWK-awk} 'BEGIN {while (1) print "x"}' </dev/null |
- timeout 10 grep x >/dev/null || fail=1
+ timeout 1 grep x >/dev/null
+test $? -eq 124 || fail=1
Exit $fail