summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPádraig Brady <P@draigBrady.com>2017-03-20 00:22:34 (GMT)
committerPádraig Brady <P@draigBrady.com>2017-03-26 22:20:26 (GMT)
commit0839e6d8d9371b7301133c6a2799bab1c906d2e1 (patch)
tree68a93325d6e09faf4b0bcbf440ee83980c31cffa
parenta79dbb97bfb2fff7905221f2437da19472b9ba23 (diff)
downloadcoreutils-0839e6d8d9371b7301133c6a2799bab1c906d2e1.zip
coreutils-0839e6d8d9371b7301133c6a2799bab1c906d2e1.tar.gz
coreutils-0839e6d8d9371b7301133c6a2799bab1c906d2e1.tar.bz2
split: process more efficiently when filters exit early
* src/split.c (bytes_split): Don't write to an existing filter if it has exited. When filters exit early, skip input data if possible. Refactor out 2 redundant variables. * tests/split/filter.sh: Improve test coverage given the new more efficient processing. Also use a 10TB file to expand the file systems tested on.
-rw-r--r--src/split.c37
-rwxr-xr-xtests/split/filter.sh33
2 files changed, 45 insertions, 25 deletions
diff --git a/src/split.c b/src/split.c
index 85bc052..01f97af 100644
--- a/src/split.c
+++ b/src/split.c
@@ -623,6 +623,7 @@ bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, size_t initial_read,
{
size_t n_read;
bool new_file_flag = true;
+ bool filter_ok = true;
uintmax_t to_write = n_bytes;
uintmax_t opened = 0;
bool eof;
@@ -637,42 +638,48 @@ bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, size_t initial_read,
}
else
{
+ if (! filter_ok
+ && lseek (STDIN_FILENO, to_write, SEEK_CUR) != -1)
+ {
+ to_write = n_bytes;
+ new_file_flag = true;
+ }
+
n_read = safe_read (STDIN_FILENO, buf, bufsize);
if (n_read == SAFE_READ_ERROR)
die (EXIT_FAILURE, errno, "%s", quotef (infile));
eof = n_read == 0;
}
char *bp_out = buf;
- size_t to_read = n_read;
- while (to_write <= to_read)
+ while (to_write <= n_read)
{
- size_t w = to_write;
- bool cwrite_ok = cwrite (new_file_flag, bp_out, w);
+ if (filter_ok || new_file_flag)
+ filter_ok = cwrite (new_file_flag, bp_out, to_write);
opened += new_file_flag;
new_file_flag = !max_files || (opened < max_files);
- if (!new_file_flag && !cwrite_ok)
+ if (! filter_ok && ! new_file_flag)
{
- /* If filter no longer accepting input, stop reading. */
- n_read = to_read = 0;
+ /* If filters no longer accepting input, stop reading. */
+ n_read = 0;
eof = true;
break;
}
- bp_out += w;
- to_read -= w;
+ bp_out += to_write;
+ n_read -= to_write;
to_write = n_bytes;
}
- if (to_read != 0)
+ if (n_read != 0)
{
- bool cwrite_ok = cwrite (new_file_flag, bp_out, to_read);
+ if (filter_ok || new_file_flag)
+ filter_ok = cwrite (new_file_flag, bp_out, n_read);
opened += new_file_flag;
- to_write -= to_read;
new_file_flag = false;
- if (!cwrite_ok && opened == max_files)
+ if (! filter_ok && opened == max_files)
{
- /* If filter no longer accepting input, stop reading. */
- n_read = 0;
+ /* If filters no longer accepting input, stop reading. */
break;
}
+ to_write -= n_read;
}
}
while (! eof);
diff --git a/tests/split/filter.sh b/tests/split/filter.sh
index a85093c..a703b3b 100755
--- a/tests/split/filter.sh
+++ b/tests/split/filter.sh
@@ -18,8 +18,7 @@
. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
print_ver_ split
-require_sparse_support_ # for 'truncate --size=$OFF_T_MAX'
-eval $(getlimits) # for OFF_T limits
+require_sparse_support_ # for 'truncate --size=$LARGE'
xz --version || skip_ "xz (better than gzip/bzip2) required"
for total_n_lines in 5 3000 20000; do
@@ -52,15 +51,29 @@ returns_ 1 split -n 1/2 --filter='true' /dev/null 2>&1 || fail=1
# where they would result in a non zero exit from split.
yes | head -n200K | split -b1G --filter='head -c1 >/dev/null' || fail=1
-# Do not use a size of OFF_T_MAX, since split.c applies a GNU/Hurd
-# /dev/zero workaround for files of that size. Use one less:
-N=$(expr $OFF_T_MAX - 1)
-
# Ensure that "endless" input is ignored when all filters finish
-timeout 10 sh -c 'yes | split --filter="head -c1 >/dev/null" -n r/1' || fail=1
-if truncate -s$N zero.in; then
- timeout 10 sh -c 'split --filter="head -c1 >/dev/null" -n 1 zero.in' || fail=1
-fi
+for mode in '' 'r/'; do
+ FILE = '-'
+ if test "$mode" = ''; then
+ FILE = 'zero.in'
+ truncate -s10T "$FILE" || continue
+ fi
+ for N in 1 2; do
+ rm -f x??.n || framework_failure_
+ timeout 10 sh -c \
+ "yes | split --filter='head -c1 >\$FILE.n' -n $mode$N $FILE" || fail=1
+ # Also ensure we get appropriate output from each filter
+ seq 1 $N | tr '0-9' 1 > stat.exp
+ stat -c%s x??.n > stat.out || framework_failure_
+ compare stat.exp stat.out || fail=1
+ done
+done
+
+# Ensure that "endless" input _is_ processed for unbounded number of filters
+for buf in 1000 1000000; do
+ returns_ 124 timeout .5 sh -c \
+ "yes | split --filter='head -c1 >/dev/null' -b $buf" || fail=1
+done
# Ensure that "endless" input _is_ processed for unbounded number of filters
for buf in 1000 1000000; do