summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2016-12-23 20:43:46 (GMT)
committerPaul Eggert <eggert@cs.ucla.edu>2016-12-24 01:22:54 (GMT)
commit192d61e2828e13c4a2f1a81cd128721a229c88f9 (patch)
tree835bafdaac9d9986e9fcac255d54c828763e1164
parent4fa1971d98c79b56b466eff57117351dc395ee2a (diff)
downloadgrep-192d61e2828e13c4a2f1a81cd128721a229c88f9.zip
grep-192d61e2828e13c4a2f1a81cd128721a229c88f9.tar.gz
grep-192d61e2828e13c4a2f1a81cd128721a229c88f9.tar.bz2
grep: speed up -wf in C locale
Problem reported by Norihiro Tanaka (Bug#22357#100). This patch improves the performance on that benchmark on my platform so that grep is now only about 2x slower than grep 2.26, which means it is considerably faster than grep 2.25 and earlier. * src/kwsearch.c (Fexecute): Use wordchars_size to boost performance for this case. * src/search.h, src/searchutils.c (wordchars_size): New function.
-rw-r--r--src/kwsearch.c6
-rw-r--r--src/search.h1
-rw-r--r--src/searchutils.c9
3 files changed, 16 insertions, 0 deletions
diff --git a/src/kwsearch.c b/src/kwsearch.c
index b30dfd0..6005b60 100644
--- a/src/kwsearch.c
+++ b/src/kwsearch.c
@@ -150,6 +150,12 @@ Fexecute (char const *buf, size_t size, size_t *match_size,
break;
len = kwsmatch.size[0];
}
+
+ /* No word match was found at BEG. Skip past word constituents,
+ since they cannot precede the next match and not skipping
+ them could make things much slower. */
+ beg += wordchars_size (beg, buf + size);
+ mb_start = beg;
} /* for (beg in buf) */
return -1;
diff --git a/src/search.h b/src/search.h
index 6fe1797..1def4d6 100644
--- a/src/search.h
+++ b/src/search.h
@@ -48,6 +48,7 @@ typedef signed char mb_len_map_t;
/* searchutils.c */
extern void wordinit (void);
extern kwset_t kwsinit (bool);
+extern size_t wordchars_size (char const *, char const *);
extern size_t wordchar_next (char const *, char const *);
extern bool wordchar_prev (char const *, char const *, char const *);
extern ptrdiff_t mb_goback (char const **, char const *, char const *);
diff --git a/src/searchutils.c b/src/searchutils.c
index e0a1db3..6f6ae0b 100644
--- a/src/searchutils.c
+++ b/src/searchutils.c
@@ -146,6 +146,15 @@ wordchars_count (char const *buf, char const *end, bool countall)
return n;
}
+/* Examine the start of BUF for the longest prefix containing just
+ word constituents. Return the total number of bytes in the prefix.
+ The buffer ends at END. */
+size_t
+wordchars_size (char const *buf, char const *end)
+{
+ return wordchars_count (buf, end, true);
+}
+
/* If BUF starts with a word constituent, return the number of bytes
used to represent it; otherwise, return zero. The buffer ends at END. */
size_t